diff --git a/unit8.2/Copy_of_unit8_part2.ipynb b/unit8.2/Copy_of_unit8_part2.ipynb
index d991222..d6bf338 100644
--- a/unit8.2/Copy_of_unit8_part2.ipynb
+++ b/unit8.2/Copy_of_unit8_part2.ipynb
@@ -22,27 +22,11 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {
"id": "I6_67HfI1CKg"
},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ],
- "text/plain": [
- ""
- ]
- },
- "execution_count": 1,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"from IPython.display import HTML\n",
"\n",
@@ -256,19 +240,11 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {
"id": "alxUt7Au-O8e"
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Python 3.9.21\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# !pip uninstall numpy\n",
"# !pip install numpy\n",
@@ -293,138 +269,9 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Obtaining file:///root/Lab/sample-factory\n",
- " Installing build dependencies ... \u001b[?25ldone\n",
- "\u001b[?25h Checking if build backend supports build_editable ... \u001b[?25ldone\n",
- "\u001b[?25h Getting requirements to build editable ... \u001b[?25ldone\n",
- "\u001b[?25h Preparing editable metadata (pyproject.toml) ... \u001b[?25ldone\n",
- "\u001b[?25hRequirement already satisfied: filelock in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from sample-factory==2.1.3) (3.18.0)\n",
- "Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from sample-factory==2.1.3) (0.29.3)\n",
- "Collecting threadpoolctl>=2.0.0\n",
- " Using cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)\n",
- "Collecting gymnasium<1.0,>=0.27\n",
- " Using cached gymnasium-0.29.1-py3-none-any.whl (953 kB)\n",
- "Requirement already satisfied: pyglet in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from sample-factory==2.1.3) (1.5.0)\n",
- "Requirement already satisfied: tensorboard>=1.15.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from sample-factory==2.1.3) (2.19.0)\n",
- "Collecting numpy<2.0,>=1.18.1\n",
- " Using cached numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n",
- "Collecting tensorboardx>=2.0\n",
- " Using cached tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)\n",
- "Collecting wandb>=0.12.9\n",
- " Using cached wandb-0.19.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.9 MB)\n",
- "Requirement already satisfied: psutil>=5.7.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from sample-factory==2.1.3) (7.0.0)\n",
- "Requirement already satisfied: torch!=1.13.0,<3.0,>=1.9 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from sample-factory==2.1.3) (2.6.0)\n",
- "Requirement already satisfied: opencv-python in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from sample-factory==2.1.3) (4.11.0.86)\n",
- "Collecting colorlog\n",
- " Using cached colorlog-6.9.0-py3-none-any.whl (11 kB)\n",
- "Requirement already satisfied: pandas in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from sample-factory==2.1.3) (2.2.3)\n",
- "Collecting signal-slot-mp<2.0,>=1.0.3\n",
- " Using cached signal_slot_mp-1.0.5-py3-none-any.whl (13 kB)\n",
- "Requirement already satisfied: importlib-metadata>=4.8.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from gymnasium<1.0,>=0.27->sample-factory==2.1.3) (4.13.0)\n",
- "Requirement already satisfied: cloudpickle>=1.2.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from gymnasium<1.0,>=0.27->sample-factory==2.1.3) (3.1.1)\n",
- "Requirement already satisfied: farama-notifications>=0.0.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from gymnasium<1.0,>=0.27->sample-factory==2.1.3) (0.0.4)\n",
- "Requirement already satisfied: typing-extensions>=4.3.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from gymnasium<1.0,>=0.27->sample-factory==2.1.3) (4.12.2)\n",
- "Requirement already satisfied: packaging>=20.9 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (20.9)\n",
- "Requirement already satisfied: pyyaml>=5.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (6.0.2)\n",
- "Requirement already satisfied: requests in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (2.32.3)\n",
- "Requirement already satisfied: tqdm>=4.42.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (4.67.1)\n",
- "Requirement already satisfied: fsspec>=2023.5.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (2025.3.0)\n",
- "Collecting faster-fifo<2.0,>=1.4.4\n",
- " Using cached faster_fifo-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (388 kB)\n",
- "Requirement already satisfied: markdown>=2.6.8 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from tensorboard>=1.15.0->sample-factory==2.1.3) (3.7)\n",
- "Requirement already satisfied: setuptools>=41.0.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from tensorboard>=1.15.0->sample-factory==2.1.3) (65.5.0)\n",
- "Requirement already satisfied: grpcio>=1.48.2 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from tensorboard>=1.15.0->sample-factory==2.1.3) (1.71.0)\n",
- "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from tensorboard>=1.15.0->sample-factory==2.1.3) (0.7.2)\n",
- "Requirement already satisfied: six>1.9 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from tensorboard>=1.15.0->sample-factory==2.1.3) (1.17.0)\n",
- "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from tensorboard>=1.15.0->sample-factory==2.1.3) (6.30.1)\n",
- "Requirement already satisfied: werkzeug>=1.0.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from tensorboard>=1.15.0->sample-factory==2.1.3) (3.1.3)\n",
- "Requirement already satisfied: absl-py>=0.4 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from tensorboard>=1.15.0->sample-factory==2.1.3) (2.2.0)\n",
- "Requirement already satisfied: jinja2 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (3.1.6)\n",
- "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (11.6.1.9)\n",
- "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (12.4.127)\n",
- "Requirement already satisfied: sympy==1.13.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (1.13.1)\n",
- "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (0.6.2)\n",
- "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (10.3.5.147)\n",
- "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (12.3.1.170)\n",
- "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (12.4.5.8)\n",
- "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (12.4.127)\n",
- "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (2.21.5)\n",
- "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (11.2.1.3)\n",
- "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (12.4.127)\n",
- "Requirement already satisfied: networkx in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (3.2.1)\n",
- "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (12.4.127)\n",
- "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (12.4.127)\n",
- "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (9.1.0.70)\n",
- "Requirement already satisfied: triton==3.2.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (3.2.0)\n",
- "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from sympy==1.13.1->torch!=1.13.0,<3.0,>=1.9->sample-factory==2.1.3) (1.3.0)\n",
- "Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from wandb>=0.12.9->sample-factory==2.1.3) (3.1.44)\n",
- "Collecting eval-type-backport\n",
- " Using cached eval_type_backport-0.2.2-py3-none-any.whl (5.8 kB)\n",
- "Requirement already satisfied: click!=8.0.0,>=7.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from wandb>=0.12.9->sample-factory==2.1.3) (8.1.8)\n",
- "Collecting setproctitle\n",
- " Using cached setproctitle-1.3.5-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
- "Collecting protobuf!=4.24.0,>=3.19.6\n",
- " Using cached protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl (319 kB)\n",
- "Requirement already satisfied: sentry-sdk>=2.0.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from wandb>=0.12.9->sample-factory==2.1.3) (2.24.0)\n",
- "Requirement already satisfied: docker-pycreds>=0.4.0 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from wandb>=0.12.9->sample-factory==2.1.3) (0.4.0)\n",
- "Collecting pydantic<3\n",
- " Using cached pydantic-2.11.3-py3-none-any.whl (443 kB)\n",
- "Requirement already satisfied: platformdirs in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from wandb>=0.12.9->sample-factory==2.1.3) (2.6.2)\n",
- "Requirement already satisfied: pytz>=2020.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from pandas->sample-factory==2.1.3) (2025.1)\n",
- "Requirement already satisfied: python-dateutil>=2.8.2 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from pandas->sample-factory==2.1.3) (2.9.0.post0)\n",
- "Requirement already satisfied: tzdata>=2022.7 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from pandas->sample-factory==2.1.3) (2025.2)\n",
- "Requirement already satisfied: future in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from pyglet->sample-factory==2.1.3) (1.0.0)\n",
- "Requirement already satisfied: gitdb<5,>=4.0.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from gitpython!=3.1.29,>=1.0.0->wandb>=0.12.9->sample-factory==2.1.3) (4.0.12)\n",
- "Requirement already satisfied: zipp>=0.5 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from importlib-metadata>=4.8.0->gymnasium<1.0,>=0.27->sample-factory==2.1.3) (3.21.0)\n",
- "Requirement already satisfied: pyparsing>=2.0.2 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from packaging>=20.9->huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (3.2.2)\n",
- "Collecting annotated-types>=0.6.0\n",
- " Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
- "Collecting pydantic-core==2.33.1\n",
- " Using cached pydantic_core-2.33.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
- "Collecting typing-inspection>=0.4.0\n",
- " Using cached typing_inspection-0.4.0-py3-none-any.whl (14 kB)\n",
- "Requirement already satisfied: urllib3<3,>=1.21.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from requests->huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (1.26.20)\n",
- "Requirement already satisfied: idna<4,>=2.5 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from requests->huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (3.10)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from requests->huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (2025.1.31)\n",
- "Requirement already satisfied: charset-normalizer<4,>=2 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from requests->huggingface-hub<1.0,>=0.10.0->sample-factory==2.1.3) (3.4.1)\n",
- "Requirement already satisfied: MarkupSafe>=2.1.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from werkzeug>=1.0.1->tensorboard>=1.15.0->sample-factory==2.1.3) (3.0.2)\n",
- "Requirement already satisfied: smmap<6,>=3.0.1 in /root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb>=0.12.9->sample-factory==2.1.3) (5.0.2)\n",
- "Building wheels for collected packages: sample-factory\n",
- " Building editable for sample-factory (pyproject.toml) ... \u001b[?25ldone\n",
- "\u001b[?25h Created wheel for sample-factory: filename=sample_factory-2.1.3-0.editable-py3-none-any.whl size=6861 sha256=bfd196fe999d741f949aace4e31569f6c5374a0db42d6e4b7bc19dabb09c4246\n",
- " Stored in directory: /tmp/pip-ephem-wheel-cache-og_0uaih/wheels/02/e5/86/0009c9cf28109be04e5601a64db4b4fb13fad104df8c619a0d\n",
- "Successfully built sample-factory\n",
- "Installing collected packages: typing-inspection, threadpoolctl, setproctitle, pydantic-core, protobuf, numpy, faster-fifo, eval-type-backport, colorlog, annotated-types, tensorboardx, signal-slot-mp, pydantic, gymnasium, wandb, sample-factory\n",
- " Attempting uninstall: protobuf\n",
- " Found existing installation: protobuf 6.30.1\n",
- " Uninstalling protobuf-6.30.1:\n",
- " Successfully uninstalled protobuf-6.30.1\n",
- " Attempting uninstall: numpy\n",
- " Found existing installation: numpy 2.0.2\n",
- " Uninstalling numpy-2.0.2:\n",
- " Successfully uninstalled numpy-2.0.2\n",
- " Attempting uninstall: gymnasium\n",
- " Found existing installation: gymnasium 1.1.1\n",
- " Uninstalling gymnasium-1.1.1:\n",
- " Successfully uninstalled gymnasium-1.1.1\n",
- " Attempting uninstall: wandb\n",
- " Found existing installation: wandb 0.12.1\n",
- " Uninstalling wandb-0.12.1:\n",
- " Successfully uninstalled wandb-0.12.1\n",
- "Successfully installed annotated-types-0.7.0 colorlog-6.9.0 eval-type-backport-0.2.2 faster-fifo-1.5.2 gymnasium-0.29.1 numpy-1.26.4 protobuf-5.29.4 pydantic-2.11.3 pydantic-core-2.33.1 sample-factory-2.1.3 setproctitle-1.3.5 signal-slot-mp-1.0.5 tensorboardx-2.6.2.2 threadpoolctl-3.6.0 typing-inspection-0.4.0 wandb-0.19.9\n",
- "\n",
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n",
- "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# !pip install vizdoom\n",
"\n",
@@ -433,53 +280,11 @@
},
{
"cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "ename": "ModuleNotFoundError",
- "evalue": "No module named 'sample_factory'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[16], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mfunctools\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msample_factory\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01malgo\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcontext\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m global_model_factory\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msample_factory\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcfg\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marguments\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m parse_full_cfg, parse_sf_args\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msample_factory\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01menvs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01menv_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m register_env\n",
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sample_factory'"
- ]
- }
- ],
- "source": [
- "import functools\n",
- "\n",
- "from sample_factory.algo.utils.context import global_model_factory\n",
- "from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args\n",
- "from sample_factory.envs.env_utils import register_env\n",
- "from sample_factory.train import run_rl\n",
- "\n",
- "from sf_examples.vizdoom.doom.doom_model import make_vizdoom_encoder\n",
- "from sf_examples.vizdoom.doom.doom_params import add_doom_env_args, doom_override_defaults\n",
- "from sf_examples.vizdoom.doom.doom_utils import DOOM_ENVS, make_doom_env_from_spec\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {
"id": "bCgZbeiavcDU"
},
- "outputs": [
- {
- "ename": "ModuleNotFoundError",
- "evalue": "No module named 'sample_factory'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[15], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mfunctools\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msample_factory\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01malgo\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcontext\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m global_model_factory\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msample_factory\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcfg\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marguments\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m parse_full_cfg, parse_sf_args\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msample_factory\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01menvs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01menv_utils\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m register_env\n",
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sample_factory'"
- ]
- }
- ],
+ "outputs": [],
"source": [
"import functools\n",
"\n",
@@ -504,7 +309,6 @@
"def register_vizdoom_models():\n",
" global_model_factory().register_encoder_factory(make_vizdoom_encoder)\n",
"\n",
- "\n",
"def register_vizdoom_components():\n",
" register_vizdoom_envs()\n",
" register_vizdoom_models()\n",
@@ -567,7 +371,44457 @@
"metadata": {
"id": "y_TeicMvyKHP"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[33m[2025-04-17 08:21:03,924][24592] Environment doom_basic already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,925][24592] Environment doom_two_colors_easy already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,926][24592] Environment doom_two_colors_hard already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,926][24592] Environment doom_dm already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,927][24592] Environment doom_dwango5 already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,927][24592] Environment doom_my_way_home_flat_actions already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,928][24592] Environment doom_defend_the_center_flat_actions already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,928][24592] Environment doom_my_way_home already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,929][24592] Environment doom_deadly_corridor already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,929][24592] Environment doom_defend_the_center already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,930][24592] Environment doom_defend_the_line already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,931][24592] Environment doom_health_gathering already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,932][24592] Environment doom_health_gathering_supreme already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,932][24592] Environment doom_battle already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,933][24592] Environment doom_battle2 already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,933][24592] Environment doom_duel_bots already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,934][24592] Environment doom_deathmatch_bots already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,934][24592] Environment doom_duel already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,935][24592] Environment doom_deathmatch_full already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,936][24592] Environment doom_benchmark already registered, overwriting...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:03,936][24592] register_encoder_factory: \u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:03,963][24592] Loading existing experiment configuration from /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/config.json\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:03,964][24592] Overriding arg 'train_for_env_steps' with value 10000000000 passed from command line\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:03,972][24592] Experiment dir /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment already exists!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:03,973][24592] Resuming existing experiment from /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:03,973][24592] Weights and Biases integration disabled\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:03,975][24592] Environment var CUDA_VISIBLE_DEVICES is 0\n",
+ "\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:09,822][626737] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:09,826][626737] Env info: EnvInfo(obs_space=Dict('obs': Box(0, 255, (3, 72, 128), uint8)), action_space=Discrete(5), num_agents=1, gpu_actions=False, gpu_observations=True, action_splits=None, all_discrete=None, frameskip=4, reward_shaping_scheme=None, env_info_protocol_version=1)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,850][24592] Starting experiment with the following configuration:\n",
+ "help=False\n",
+ "algo=APPO\n",
+ "env=doom_health_gathering_supreme\n",
+ "experiment=default_experiment\n",
+ "train_dir=/root/Lab/ppo-implementation-details/unit8.2/train_dir\n",
+ "restart_behavior=resume\n",
+ "device=gpu\n",
+ "seed=6666\n",
+ "num_policies=1\n",
+ "async_rl=True\n",
+ "serial_mode=False\n",
+ "batched_sampling=False\n",
+ "num_batches_to_accumulate=2\n",
+ "worker_num_splits=2\n",
+ "policy_workers_per_policy=1\n",
+ "max_policy_lag=1000\n",
+ "num_workers=20\n",
+ "num_envs_per_worker=12\n",
+ "batch_size=2048\n",
+ "num_batches_per_epoch=1\n",
+ "num_epochs=1\n",
+ "rollout=32\n",
+ "recurrence=32\n",
+ "shuffle_minibatches=False\n",
+ "gamma=0.99\n",
+ "reward_scale=1.0\n",
+ "reward_clip=1000.0\n",
+ "value_bootstrap=False\n",
+ "normalize_returns=True\n",
+ "exploration_loss_coeff=0.001\n",
+ "value_loss_coeff=0.5\n",
+ "kl_loss_coeff=0.0\n",
+ "exploration_loss=symmetric_kl\n",
+ "gae_lambda=0.95\n",
+ "ppo_clip_ratio=0.1\n",
+ "ppo_clip_value=0.2\n",
+ "with_vtrace=False\n",
+ "vtrace_rho=1.0\n",
+ "vtrace_c=1.0\n",
+ "optimizer=adam\n",
+ "adam_eps=1e-06\n",
+ "adam_beta1=0.9\n",
+ "adam_beta2=0.999\n",
+ "max_grad_norm=0.0\n",
+ "learning_rate=0.0001\n",
+ "lr_schedule=constant\n",
+ "lr_schedule_kl_threshold=0.008\n",
+ "lr_adaptive_min=1e-06\n",
+ "lr_adaptive_max=0.01\n",
+ "obs_subtract_mean=0.0\n",
+ "obs_scale=255.0\n",
+ "normalize_input=True\n",
+ "normalize_input_keys=None\n",
+ "decorrelate_experience_max_seconds=1\n",
+ "decorrelate_envs_on_one_worker=True\n",
+ "actor_worker_gpus=[]\n",
+ "set_workers_cpu_affinity=True\n",
+ "force_envs_single_thread=False\n",
+ "default_niceness=0\n",
+ "log_to_file=True\n",
+ "experiment_summaries_interval=10\n",
+ "flush_summaries_interval=30\n",
+ "stats_avg=100\n",
+ "summaries_use_frameskip=True\n",
+ "heartbeat_interval=20\n",
+ "heartbeat_reporting_interval=300\n",
+ "train_for_env_steps=10000000000\n",
+ "train_for_seconds=3600000\n",
+ "save_every_sec=120\n",
+ "keep_checkpoints=2\n",
+ "load_checkpoint_kind=latest\n",
+ "save_milestones_sec=-1\n",
+ "save_best_every_sec=5\n",
+ "save_best_metric=reward\n",
+ "save_best_after=100000\n",
+ "benchmark=False\n",
+ "encoder_mlp_layers=[512, 512]\n",
+ "encoder_conv_architecture=convnet_simple\n",
+ "encoder_conv_mlp_layers=[512]\n",
+ "use_rnn=True\n",
+ "rnn_size=512\n",
+ "rnn_type=lstm\n",
+ "rnn_num_layers=1\n",
+ "decoder_mlp_layers=[]\n",
+ "nonlinearity=relu\n",
+ "policy_initialization=orthogonal\n",
+ "policy_init_gain=1.0\n",
+ "actor_critic_share_weights=True\n",
+ "adaptive_stddev=True\n",
+ "continuous_tanh_scale=0.0\n",
+ "initial_stddev=1.0\n",
+ "use_env_info_cache=False\n",
+ "env_gpu_actions=False\n",
+ "env_gpu_observations=True\n",
+ "env_frameskip=4\n",
+ "env_framestack=1\n",
+ "pixel_format=CHW\n",
+ "use_record_episode_statistics=False\n",
+ "episode_counter=False\n",
+ "with_wandb=False\n",
+ "wandb_user=None\n",
+ "wandb_project=sample_factory\n",
+ "wandb_group=None\n",
+ "wandb_job_type=SF\n",
+ "wandb_tags=[]\n",
+ "wandb_dir=/root/Lab/ppo-implementation-details/unit8.2/wandb\n",
+ "with_pbt=False\n",
+ "pbt_mix_policies_in_one_env=True\n",
+ "pbt_period_env_steps=5000000\n",
+ "pbt_start_mutation=20000000\n",
+ "pbt_replace_fraction=0.3\n",
+ "pbt_mutation_rate=0.15\n",
+ "pbt_replace_reward_gap=0.1\n",
+ "pbt_replace_reward_gap_absolute=1e-06\n",
+ "pbt_optimize_gamma=False\n",
+ "pbt_target_objective=true_objective\n",
+ "pbt_perturb_min=1.1\n",
+ "pbt_perturb_max=1.5\n",
+ "num_agents=-1\n",
+ "num_humans=0\n",
+ "num_bots=-1\n",
+ "start_bot_difficulty=None\n",
+ "timelimit=None\n",
+ "res_w=128\n",
+ "res_h=72\n",
+ "wide_aspect_ratio=False\n",
+ "eval_env_frameskip=1\n",
+ "fps=35\n",
+ "command_line=--env=doom_health_gathering_supreme --num_workers=20 --num_envs_per_worker=12 --train_for_env_steps=1000000000 --train_for_seconds=3600000 --algo=APPO --gamma=0.99 --use_rnn=True --num_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --benchmark=False --max_grad_norm=0.0 --decorrelate_experience_max_seconds=1 --nonlinearity=relu --rnn_type=lstm --num_policies=1 --heartbeat_reporting_interval=300 --seed=6666\n",
+ "cli_args={'algo': 'APPO', 'env': 'doom_health_gathering_supreme', 'seed': 6666, 'num_policies': 1, 'num_workers': 20, 'num_envs_per_worker': 12, 'batch_size': 2048, 'num_epochs': 1, 'rollout': 32, 'recurrence': 32, 'gamma': 0.99, 'max_grad_norm': 0.0, 'decorrelate_experience_max_seconds': 1, 'heartbeat_reporting_interval': 300, 'train_for_env_steps': 1000000000, 'train_for_seconds': 3600000, 'benchmark': False, 'use_rnn': True, 'rnn_type': 'lstm', 'nonlinearity': 'relu'}\n",
+ "git_hash=077935e57d805e2e773a804597efa3bbd66c95ce\n",
+ "git_repo_name=git@github.com:qdrk/ppo-implementation-details.git\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,852][24592] Saving configuration to /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/config.json...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,896][24592] Rollout worker 0 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,897][24592] Rollout worker 1 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,898][24592] Rollout worker 2 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,898][24592] Rollout worker 3 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,899][24592] Rollout worker 4 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,899][24592] Rollout worker 5 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,900][24592] Rollout worker 6 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,900][24592] Rollout worker 7 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,901][24592] Rollout worker 8 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,901][24592] Rollout worker 9 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,902][24592] Rollout worker 10 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,902][24592] Rollout worker 11 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,903][24592] Rollout worker 12 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,904][24592] Rollout worker 13 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,904][24592] Rollout worker 14 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,905][24592] Rollout worker 15 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,905][24592] Rollout worker 16 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,906][24592] Rollout worker 17 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,907][24592] Rollout worker 18 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:10,907][24592] Rollout worker 19 uses device cpu\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,052][24592] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:11,054][24592] InferenceWorker_p0-w0: min num requests: 6\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,196][24592] Starting all processes...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,196][24592] Starting process learner_proc0\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,391][24592] Starting all processes...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,416][24592] Starting process inference_proc0-0\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,417][24592] Starting process rollout_proc0\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,417][24592] Starting process rollout_proc1\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,417][24592] Starting process rollout_proc2\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,417][24592] Starting process rollout_proc3\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,418][24592] Starting process rollout_proc4\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,418][24592] Starting process rollout_proc5\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,419][24592] Starting process rollout_proc6\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,420][24592] Starting process rollout_proc7\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,420][24592] Starting process rollout_proc8\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,420][24592] Starting process rollout_proc9\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,421][24592] Starting process rollout_proc10\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,421][24592] Starting process rollout_proc11\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,423][24592] Starting process rollout_proc12\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,428][24592] Starting process rollout_proc13\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,446][24592] Starting process rollout_proc14\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,485][24592] Starting process rollout_proc15\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,514][24592] Starting process rollout_proc16\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,516][24592] Starting process rollout_proc17\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,576][24592] Starting process rollout_proc18\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:11,581][24592] Starting process rollout_proc19\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:13,843][626772] LearnerWorker_p0\tpid 626772\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:13,843][626772] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:13,844][626772] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,064][626797] Rollout worker 1 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,064][626797] ROLLOUT worker 1\tpid 626797\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,064][626797] Worker 1 uses CPU cores [1]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,100][626795] InferenceWorker_p0-w0\tpid 626795\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,120][626795] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,120][626795] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,206][626796] Rollout worker 0 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,209][626796] ROLLOUT worker 0\tpid 626796\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,219][626796] Worker 0 uses CPU cores [0]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,253][626808] Rollout worker 5 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,253][626808] ROLLOUT worker 5\tpid 626808\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,259][626808] Worker 5 uses CPU cores [5]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,376][626805] Rollout worker 2 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,376][626805] ROLLOUT worker 2\tpid 626805\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,359][626807] Rollout worker 4 starting...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,379][626805] Worker 2 uses CPU cores [2]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,379][626807] ROLLOUT worker 4\tpid 626807\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,380][626807] Worker 4 uses CPU cores [4]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,479][626772] Num visible devices: 1\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,480][626795] Num visible devices: 1\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,500][626772] Setting fixed seed 6666\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,511][626772] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,511][626772] Initializing actor-critic model on device cuda:0\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,511][626772] RunningMeanStd input shape: (3, 72, 128)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,519][626772] RunningMeanStd input shape: (1,)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,532][626772] ConvEncoder: input_channels=3\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,700][626806] Rollout worker 3 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,700][626806] ROLLOUT worker 3\tpid 626806\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,709][626806] Worker 3 uses CPU cores [3]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,753][626772] Conv encoder output size: 512\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,759][626772] Policy head output size: 512\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,789][626772] Created Actor Critic model with architecture:\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,789][626772] ActorCriticSharedWeights(\n",
+ " (obs_normalizer): ObservationNormalizer(\n",
+ " (running_mean_std): RunningMeanStdDictInPlace(\n",
+ " (running_mean_std): ModuleDict(\n",
+ " (obs): RunningMeanStdInPlace()\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)\n",
+ " (encoder): VizdoomEncoder(\n",
+ " (basic_encoder): ConvEncoder(\n",
+ " (enc): RecursiveScriptModule(\n",
+ " original_name=ConvEncoderImpl\n",
+ " (conv_head): RecursiveScriptModule(\n",
+ " original_name=Sequential\n",
+ " (0): RecursiveScriptModule(original_name=Conv2d)\n",
+ " (1): RecursiveScriptModule(original_name=ReLU)\n",
+ " (2): RecursiveScriptModule(original_name=Conv2d)\n",
+ " (3): RecursiveScriptModule(original_name=ReLU)\n",
+ " (4): RecursiveScriptModule(original_name=Conv2d)\n",
+ " (5): RecursiveScriptModule(original_name=ReLU)\n",
+ " )\n",
+ " (mlp_layers): RecursiveScriptModule(\n",
+ " original_name=Sequential\n",
+ " (0): RecursiveScriptModule(original_name=Linear)\n",
+ " (1): RecursiveScriptModule(original_name=ReLU)\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " (core): ModelCoreRNN(\n",
+ " (core): LSTM(512, 512)\n",
+ " )\n",
+ " (decoder): MlpDecoder(\n",
+ " (mlp): Identity()\n",
+ " )\n",
+ " (critic_linear): Linear(in_features=512, out_features=1, bias=True)\n",
+ " (action_parameterization): ActionParameterizationDefault(\n",
+ " (distribution_linear): Linear(in_features=512, out_features=5, bias=True)\n",
+ " )\n",
+ ")\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,888][626835] Rollout worker 9 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,900][626835] ROLLOUT worker 9\tpid 626835\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,909][626835] Worker 9 uses CPU cores [1]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,931][626817] Rollout worker 8 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,931][626817] ROLLOUT worker 8\tpid 626817\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,932][626817] Worker 8 uses CPU cores [0]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,954][626819] Rollout worker 11 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,954][626819] ROLLOUT worker 11\tpid 626819\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,959][626819] Worker 11 uses CPU cores [3]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,961][626865] Rollout worker 15 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:14,961][626865] ROLLOUT worker 15\tpid 626865\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:14,970][626865] Worker 15 uses CPU cores [7]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,066][626870] Rollout worker 18 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,067][626870] ROLLOUT worker 18\tpid 626870\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,069][626870] Worker 18 uses CPU cores [4, 5]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,115][626818] Rollout worker 7 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,116][626818] ROLLOUT worker 7\tpid 626818\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,119][626818] Worker 7 uses CPU cores [7]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,141][626772] Using optimizer \u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,170][626850] Rollout worker 13 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,170][626850] ROLLOUT worker 13\tpid 626850\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,170][626869] Rollout worker 12 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,170][626869] ROLLOUT worker 12\tpid 626869\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,170][626869] Worker 12 uses CPU cores [4]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,171][626850] Worker 13 uses CPU cores [5]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,178][626866] Rollout worker 16 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,178][626866] ROLLOUT worker 16\tpid 626866\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,180][626866] Worker 16 uses CPU cores [0, 1]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,182][626849] Rollout worker 10 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,182][626849] ROLLOUT worker 10\tpid 626849\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,183][626871] Rollout worker 19 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,183][626871] ROLLOUT worker 19\tpid 626871\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,183][626849] Worker 10 uses CPU cores [2]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,184][626871] Worker 19 uses CPU cores [6, 7]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,204][626816] Rollout worker 6 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,204][626816] ROLLOUT worker 6\tpid 626816\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,205][626816] Worker 6 uses CPU cores [6]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,210][626868] Rollout worker 17 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,210][626868] ROLLOUT worker 17\tpid 626868\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,211][626868] Worker 17 uses CPU cores [2, 3]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,243][626867] Rollout worker 14 starting...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:15,243][626867] ROLLOUT worker 14\tpid 626867\tparent 24592\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:15,243][626867] Worker 14 uses CPU cores [6]\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:21:17,427][626772] Loading state from checkpoint /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000122072_1000013824.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,447][626772] Loading model from checkpoint\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:17,449][626772] Loaded experiment state at self.train_step=122072, self.env_steps=1000013824\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,449][626772] Initialized policy 0 weights for model version 122072\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,451][626772] LearnerWorker_p0 finished initialization!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,451][626772] Using GPUs [0] for process 0 (actually maps to GPUs [0])\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,523][626795] RunningMeanStd input shape: (3, 72, 128)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,523][626795] RunningMeanStd input shape: (1,)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,529][626795] ConvEncoder: input_channels=3\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,571][626795] Conv encoder output size: 512\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,571][626795] Policy head output size: 512\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:17,593][24592] Inference worker 0-0 is ready!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:17,594][24592] All inference workers are ready! Signal rollout workers to start!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,626][626819] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,629][626797] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,632][626867] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,634][626816] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,635][626806] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,635][626835] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,638][626817] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,642][626850] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,643][626796] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,643][626869] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:17,646][626865] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,647][626808] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,647][626866] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,648][626805] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,651][626870] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,653][626807] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,655][626849] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,659][626868] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,668][626818] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:21:17,704][626871] Doom resolution: 160x120, resize resolution: (128, 72)\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:17,904][626816] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:17,965][626796] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:17,998][626819] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:17,999][626835] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:17,999][626806] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,000][626797] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,062][626849] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,082][626818] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,085][626865] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,117][626816] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,126][626805] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,131][626796] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,171][626850] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,226][626866] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,232][626835] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,247][626807] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,254][626849] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,284][626816] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,336][626817] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,353][626819] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,358][626818] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,366][626868] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,368][626796] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,408][626805] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,409][626807] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,423][626849] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,485][626868] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,494][626850] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,534][626818] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,534][626807] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,606][626797] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,609][626835] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,624][626796] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,625][626867] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,634][626866] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,639][626816] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,664][626818] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,704][626817] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,725][626849] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,737][626808] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,750][626819] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,776][626807] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,837][626817] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,851][626867] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,859][626808] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,883][626819] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,903][626868] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,912][626818] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,912][626849] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,937][626797] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:18,974][626865] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:18,975][24592] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 1000013824. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,032][626866] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,037][626867] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,056][626808] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,077][626797] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,079][626818] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,088][626870] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,168][626849] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,184][626819] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,212][626870] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,231][626866] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,245][626869] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,254][626807] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,314][626796] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,364][626866] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,374][626817] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,387][626867] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,419][626871] Decorrelating experience for 0 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,448][626805] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,506][626868] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,508][626807] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,517][626866] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,561][626865] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,584][626818] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,598][626796] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,635][626817] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,654][626870] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,656][626808] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,717][626869] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,738][626867] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,742][626818] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,766][626866] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,830][626835] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,847][626869] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,889][626849] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,891][626867] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,909][626819] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,925][626870] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,931][626805] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,993][626850] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:19,995][626866] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,085][626817] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,102][626816] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,136][626818] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,146][626806] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,147][626870] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,172][626866] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,191][626865] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,206][626849] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,210][626805] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,276][626806] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,328][626807] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,379][626869] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,404][626850] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,407][626796] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,444][626818] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,449][626817] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,471][626808] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,487][626865] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,528][626866] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,586][626869] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,598][626819] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,610][626850] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,621][626849] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,625][626796] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,631][626835] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,747][626818] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,778][626816] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,789][626868] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,818][626796] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,830][626867] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,862][626869] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,873][626866] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,910][626807] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,951][626805] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:20,972][626797] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,027][626796] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,041][626850] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,047][626867] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,115][626868] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,117][626835] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,123][626871] Decorrelating experience for 32 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,153][626818] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,165][626870] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,200][626865] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,325][626797] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,331][626868] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,333][626869] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,351][626819] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,367][626806] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,381][626807] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,460][626865] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,470][626850] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,480][626796] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,543][626805] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,570][626869] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,590][626871] Decorrelating experience for 64 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,605][626816] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,688][626806] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,717][626796] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,735][626850] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,748][626835] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,766][626819] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,774][626867] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,831][626808] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,870][626816] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,928][626865] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:21,937][626868] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,009][626869] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,023][626850] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,038][626806] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,045][626867] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,055][626849] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,080][626819] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,089][626807] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,104][626835] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,248][626867] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,263][626805] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,276][626868] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,281][626797] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,309][626865] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,311][626871] Decorrelating experience for 96 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,329][626850] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,331][626808] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,441][626866] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,447][626816] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,466][626797] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,471][626805] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,509][626868] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,549][626871] Decorrelating experience for 128 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,598][626808] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,610][626870] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,668][626867] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,692][626805] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,741][626806] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,776][626869] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,803][626797] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,864][626850] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,885][626865] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,887][626835] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,901][626868] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,974][626870] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:22,995][626819] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,015][626808] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,085][626797] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,087][626869] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,090][626818] Worker 7, sleep for 0.350 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,092][626816] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,159][626865] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,160][626866] Worker 16, sleep for 0.800 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,175][626868] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,234][626850] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,255][626806] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,303][626869] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,333][626817] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,344][626867] Worker 14, sleep for 0.700 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,349][626797] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,365][626819] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,369][626816] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,442][626818] Worker 7 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,509][626870] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,562][626835] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,590][626805] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,592][626865] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,607][626806] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,630][626807] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,665][626871] Decorrelating experience for 160 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,667][626817] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,735][626849] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,861][626797] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,870][626869] Worker 12, sleep for 0.600 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,872][626850] Worker 13, sleep for 0.650 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,877][626808] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,887][626870] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,895][626868] Worker 17, sleep for 0.850 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,908][626816] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:23,963][626866] Worker 16 awakens!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:23,976][24592] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 1000013824. Throughput: 0: 555.6. Samples: 2778. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:23,980][24592] Avg episode reward: [(0, '1.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,024][626807] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,027][626835] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,047][626867] Worker 14 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,053][626817] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,113][626819] Worker 11, sleep for 0.550 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,202][626805] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,208][626849] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,245][626808] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,318][626870] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,423][626865] Worker 15, sleep for 0.750 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,428][626871] Decorrelating experience for 192 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,448][626808] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,449][626817] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,473][626869] Worker 12 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,525][626850] Worker 13 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,606][626797] Worker 1, sleep for 0.050 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,615][626835] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:24,632][626772] Signal inference workers to stop experience collection...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:24,637][626795] InferenceWorker_p0-w0: stopping experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,657][626797] Worker 1 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,660][626816] Worker 6, sleep for 0.300 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,669][626819] Worker 11 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,696][626870] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,723][626817] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,749][626868] Worker 17 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,773][626806] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,838][626871] Decorrelating experience for 224 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,945][626806] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:24,962][626816] Worker 6 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:25,010][626871] Decorrelating experience for 256 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:25,108][626806] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:25,176][626865] Worker 15 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:25,189][626871] Decorrelating experience for 288 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:25,367][626871] Decorrelating experience for 320 frames...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:25,556][626871] Decorrelating experience for 352 frames...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:26,065][626772] Signal inference workers to resume experience collection...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:26,065][626795] InferenceWorker_p0-w0: resuming experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:26,168][626807] Worker 4, sleep for 0.200 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:26,370][626807] Worker 4 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:26,623][626805] Worker 2, sleep for 0.100 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:26,627][626849] Worker 10, sleep for 0.500 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:26,729][626805] Worker 2 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:26,789][626808] Worker 5, sleep for 0.250 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:26,892][626806] Worker 3, sleep for 0.150 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:26,907][626817] Worker 8, sleep for 0.400 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:26,939][626835] Worker 9, sleep for 0.450 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:27,035][626870] Worker 18, sleep for 0.900 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:27,044][626806] Worker 3 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:27,044][626808] Worker 5 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:27,071][626871] Worker 19, sleep for 0.950 sec to decorrelate experience collection\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:27,129][626849] Worker 10 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:27,312][626817] Worker 8 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:27,392][626835] Worker 9 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:27,781][626795] Updated weights for policy 0, policy_version 122082 (0.0052)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:27,939][626870] Worker 18 awakens!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:28,025][626871] Worker 19 awakens!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:28,976][24592] Fps is (10 sec: 13106.8, 60 sec: 13106.8, 300 sec: 13106.8). Total num frames: 1000144896. Throughput: 0: 1107.6. Samples: 11076. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:28,977][24592] Avg episode reward: [(0, '4.197')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:29,569][626795] Updated weights for policy 0, policy_version 122092 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,050][24592] Heartbeat connected on Batcher_0\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,051][24592] Heartbeat connected on LearnerWorker_p0\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,056][24592] Heartbeat connected on InferenceWorker_p0-w0\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,067][24592] Heartbeat connected on RolloutWorker_w1\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,070][24592] Heartbeat connected on RolloutWorker_w7\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,071][24592] Heartbeat connected on RolloutWorker_w5\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,072][24592] Heartbeat connected on RolloutWorker_w4\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,072][24592] Heartbeat connected on RolloutWorker_w2\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,076][24592] Heartbeat connected on RolloutWorker_w0\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,079][24592] Heartbeat connected on RolloutWorker_w12\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,082][24592] Heartbeat connected on RolloutWorker_w15\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,089][24592] Heartbeat connected on RolloutWorker_w9\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,091][24592] Heartbeat connected on RolloutWorker_w10\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,093][24592] Heartbeat connected on RolloutWorker_w6\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,098][24592] Heartbeat connected on RolloutWorker_w8\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,100][24592] Heartbeat connected on RolloutWorker_w13\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,111][24592] Heartbeat connected on RolloutWorker_w11\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,113][24592] Heartbeat connected on RolloutWorker_w3\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,119][24592] Heartbeat connected on RolloutWorker_w14\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,194][24592] Heartbeat connected on RolloutWorker_w16\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,208][24592] Heartbeat connected on RolloutWorker_w19\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,212][24592] Heartbeat connected on RolloutWorker_w18\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,214][24592] Heartbeat connected on RolloutWorker_w17\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:31,356][626795] Updated weights for policy 0, policy_version 122102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:32,919][626795] Updated weights for policy 0, policy_version 122112 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:33,976][24592] Fps is (10 sec: 37682.8, 60 sec: 25121.5, 300 sec: 25121.5). Total num frames: 1000390656. Throughput: 0: 5505.9. Samples: 82590. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:33,977][24592] Avg episode reward: [(0, '4.246')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:37,272][626795] Updated weights for policy 0, policy_version 122122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:38,880][626795] Updated weights for policy 0, policy_version 122132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:38,975][24592] Fps is (10 sec: 36045.6, 60 sec: 24575.9, 300 sec: 24575.9). Total num frames: 1000505344. Throughput: 0: 6181.2. Samples: 123624. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:38,977][24592] Avg episode reward: [(0, '4.928')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:40,623][626795] Updated weights for policy 0, policy_version 122142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:42,246][626795] Updated weights for policy 0, policy_version 122152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:43,873][626795] Updated weights for policy 0, policy_version 122162 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:43,975][24592] Fps is (10 sec: 36046.1, 60 sec: 29491.2, 300 sec: 29491.2). Total num frames: 1000751104. Throughput: 0: 6429.1. Samples: 160728. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:43,976][24592] Avg episode reward: [(0, '4.222')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:45,639][626795] Updated weights for policy 0, policy_version 122172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:47,120][626795] Updated weights for policy 0, policy_version 122182 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:48,830][626795] Updated weights for policy 0, policy_version 122192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:48,976][24592] Fps is (10 sec: 49970.4, 60 sec: 33040.8, 300 sec: 33040.8). Total num frames: 1001005056. Throughput: 0: 7840.1. Samples: 235206. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:48,976][24592] Avg episode reward: [(0, '4.291')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:50,498][626795] Updated weights for policy 0, policy_version 122202 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:51,959][626795] Updated weights for policy 0, policy_version 122212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:53,502][626795] Updated weights for policy 0, policy_version 122222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:53,975][24592] Fps is (10 sec: 50790.5, 60 sec: 35576.7, 300 sec: 35576.7). Total num frames: 1001259008. Throughput: 0: 8940.3. Samples: 312912. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:53,977][24592] Avg episode reward: [(0, '4.365')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:55,150][626795] Updated weights for policy 0, policy_version 122232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:56,653][626795] Updated weights for policy 0, policy_version 122242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:21:58,335][626795] Updated weights for policy 0, policy_version 122252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:58,975][24592] Fps is (10 sec: 50791.3, 60 sec: 37478.3, 300 sec: 37478.3). Total num frames: 1001512960. Throughput: 0: 8788.3. Samples: 351534. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:21:58,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:00,070][626795] Updated weights for policy 0, policy_version 122262 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:01,704][626795] Updated weights for policy 0, policy_version 122272 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:03,228][626795] Updated weights for policy 0, policy_version 122282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:03,975][24592] Fps is (10 sec: 50790.3, 60 sec: 38957.5, 300 sec: 38957.5). Total num frames: 1001766912. Throughput: 0: 9471.7. Samples: 426228. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:03,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:04,929][626795] Updated weights for policy 0, policy_version 122292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:06,513][626795] Updated weights for policy 0, policy_version 122302 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:08,284][626795] Updated weights for policy 0, policy_version 122312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:08,975][24592] Fps is (10 sec: 49971.1, 60 sec: 39976.9, 300 sec: 39976.9). Total num frames: 1002012672. Throughput: 0: 11060.3. Samples: 500490. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:08,977][24592] Avg episode reward: [(0, '4.239')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:12,660][626795] Updated weights for policy 0, policy_version 122322 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:13,975][24592] Fps is (10 sec: 35225.3, 60 sec: 38278.9, 300 sec: 38278.9). Total num frames: 1002119168. Throughput: 0: 10984.7. Samples: 505386. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:13,977][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:14,397][626795] Updated weights for policy 0, policy_version 122332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:16,038][626795] Updated weights for policy 0, policy_version 122342 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:17,777][626795] Updated weights for policy 0, policy_version 122352 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:18,975][24592] Fps is (10 sec: 34406.6, 60 sec: 39048.5, 300 sec: 39048.5). Total num frames: 1002356736. Throughput: 0: 10960.4. Samples: 575802. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:18,976][24592] Avg episode reward: [(0, '4.295')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:19,572][626795] Updated weights for policy 0, policy_version 122362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:21,173][626795] Updated weights for policy 0, policy_version 122372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:22,852][626795] Updated weights for policy 0, policy_version 122382 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:23,975][24592] Fps is (10 sec: 46694.8, 60 sec: 42871.6, 300 sec: 39573.6). Total num frames: 1002586112. Throughput: 0: 11572.4. Samples: 644382. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:23,976][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:24,876][626795] Updated weights for policy 0, policy_version 122392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:26,459][626795] Updated weights for policy 0, policy_version 122402 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:28,021][626795] Updated weights for policy 0, policy_version 122412 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:28,976][24592] Fps is (10 sec: 48332.7, 60 sec: 44919.6, 300 sec: 40374.8). Total num frames: 1002840064. Throughput: 0: 11589.5. Samples: 682254. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:28,977][24592] Avg episode reward: [(0, '4.324')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:29,607][626795] Updated weights for policy 0, policy_version 122422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:31,139][626795] Updated weights for policy 0, policy_version 122432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:32,795][626795] Updated weights for policy 0, policy_version 122442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:33,975][24592] Fps is (10 sec: 52429.0, 60 sec: 45329.4, 300 sec: 41287.7). Total num frames: 1003110400. Throughput: 0: 11664.5. Samples: 760104. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:33,979][24592] Avg episode reward: [(0, '4.336')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:34,333][626795] Updated weights for policy 0, policy_version 122452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:35,958][626795] Updated weights for policy 0, policy_version 122462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:37,558][626795] Updated weights for policy 0, policy_version 122472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:38,975][24592] Fps is (10 sec: 51610.0, 60 sec: 47513.7, 300 sec: 41779.2). Total num frames: 1003356160. Throughput: 0: 11646.1. Samples: 836988. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:38,976][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:39,209][626795] Updated weights for policy 0, policy_version 122482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:40,849][626795] Updated weights for policy 0, policy_version 122492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:42,567][626795] Updated weights for policy 0, policy_version 122502 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:43,975][24592] Fps is (10 sec: 49970.6, 60 sec: 47650.1, 300 sec: 42309.2). Total num frames: 1003610112. Throughput: 0: 11602.1. Samples: 873630. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:43,977][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:47,004][626795] Updated weights for policy 0, policy_version 122512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:48,631][626795] Updated weights for policy 0, policy_version 122522 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:48,975][24592] Fps is (10 sec: 35225.4, 60 sec: 45056.1, 300 sec: 41051.0). Total num frames: 1003708416. Throughput: 0: 10839.1. Samples: 913986. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:48,978][24592] Avg episode reward: [(0, '4.321')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:50,335][626795] Updated weights for policy 0, policy_version 122532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:52,055][626795] Updated weights for policy 0, policy_version 122542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:53,840][626795] Updated weights for policy 0, policy_version 122552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:53,975][24592] Fps is (10 sec: 34406.6, 60 sec: 44919.4, 300 sec: 41477.4). Total num frames: 1003954176. Throughput: 0: 10780.8. Samples: 985626. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:53,978][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:55,406][626795] Updated weights for policy 0, policy_version 122562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:56,981][626795] Updated weights for policy 0, policy_version 122572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:22:58,617][626795] Updated weights for policy 0, policy_version 122582 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:58,975][24592] Fps is (10 sec: 49971.4, 60 sec: 44919.5, 300 sec: 41943.0). Total num frames: 1004208128. Throughput: 0: 11509.2. Samples: 1023300. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:22:58,977][24592] Avg episode reward: [(0, '4.311')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:00,291][626795] Updated weights for policy 0, policy_version 122592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:01,869][626795] Updated weights for policy 0, policy_version 122602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:03,475][626795] Updated weights for policy 0, policy_version 122612 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:03,975][24592] Fps is (10 sec: 50790.2, 60 sec: 44919.4, 300 sec: 42364.3). Total num frames: 1004462080. Throughput: 0: 11640.6. Samples: 1099632. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:03,977][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000122615_1004462080.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000121927_998825984.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:05,116][626795] Updated weights for policy 0, policy_version 122622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:06,768][626795] Updated weights for policy 0, policy_version 122632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:08,434][626795] Updated weights for policy 0, policy_version 122642 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:08,976][24592] Fps is (10 sec: 49969.2, 60 sec: 44919.2, 300 sec: 42672.7). Total num frames: 1004707840. Throughput: 0: 11788.3. Samples: 1174860. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:08,976][24592] Avg episode reward: [(0, '4.214')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:10,027][626795] Updated weights for policy 0, policy_version 122652 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:11,575][626795] Updated weights for policy 0, policy_version 122662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:13,235][626795] Updated weights for policy 0, policy_version 122672 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:13,975][24592] Fps is (10 sec: 49971.6, 60 sec: 47377.2, 300 sec: 43025.8). Total num frames: 1004961792. Throughput: 0: 11796.7. Samples: 1213104. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:13,977][24592] Avg episode reward: [(0, '4.369')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:14,910][626795] Updated weights for policy 0, policy_version 122682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:16,619][626795] Updated weights for policy 0, policy_version 122692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:18,234][626795] Updated weights for policy 0, policy_version 122702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:18,977][24592] Fps is (10 sec: 49965.0, 60 sec: 47512.3, 300 sec: 43280.5). Total num frames: 1005207552. Throughput: 0: 11705.3. Samples: 1286862. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:18,979][24592] Avg episode reward: [(0, '4.381')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:22,713][626795] Updated weights for policy 0, policy_version 122712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:23,975][24592] Fps is (10 sec: 35225.5, 60 sec: 45465.6, 300 sec: 42401.8). Total num frames: 1005314048. Throughput: 0: 10877.6. Samples: 1326480. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:23,976][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:24,466][626795] Updated weights for policy 0, policy_version 122722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:26,252][626795] Updated weights for policy 0, policy_version 122732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:27,932][626795] Updated weights for policy 0, policy_version 122742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:28,975][24592] Fps is (10 sec: 33592.6, 60 sec: 45056.0, 300 sec: 42535.4). Total num frames: 1005543424. Throughput: 0: 10813.5. Samples: 1360236. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:28,977][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:29,709][626795] Updated weights for policy 0, policy_version 122752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:31,337][626795] Updated weights for policy 0, policy_version 122762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:32,990][626795] Updated weights for policy 0, policy_version 122772 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:33,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44646.4, 300 sec: 42780.4). Total num frames: 1005789184. Throughput: 0: 11530.9. Samples: 1432878. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:33,976][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:34,741][626795] Updated weights for policy 0, policy_version 122782 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:36,299][626795] Updated weights for policy 0, policy_version 122792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:37,966][626795] Updated weights for policy 0, policy_version 122802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:38,975][24592] Fps is (10 sec: 49151.6, 60 sec: 44646.3, 300 sec: 43008.0). Total num frames: 1006034944. Throughput: 0: 11590.0. Samples: 1507176. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:38,977][24592] Avg episode reward: [(0, '4.248')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:39,616][626795] Updated weights for policy 0, policy_version 122812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:41,285][626795] Updated weights for policy 0, policy_version 122822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:42,808][626795] Updated weights for policy 0, policy_version 122832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:43,975][24592] Fps is (10 sec: 50790.5, 60 sec: 44783.0, 300 sec: 43332.8). Total num frames: 1006297088. Throughput: 0: 11596.7. Samples: 1545150. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:43,976][24592] Avg episode reward: [(0, '4.323')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:44,471][626795] Updated weights for policy 0, policy_version 122842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:46,197][626795] Updated weights for policy 0, policy_version 122852 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:47,803][626795] Updated weights for policy 0, policy_version 122862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:48,975][24592] Fps is (10 sec: 50790.8, 60 sec: 47240.5, 300 sec: 43526.8). Total num frames: 1006542848. Throughput: 0: 11577.9. Samples: 1620636. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:48,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:49,511][626795] Updated weights for policy 0, policy_version 122872 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:51,170][626795] Updated weights for policy 0, policy_version 122882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:52,792][626795] Updated weights for policy 0, policy_version 122892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:53,976][24592] Fps is (10 sec: 48331.3, 60 sec: 47103.8, 300 sec: 43655.3). Total num frames: 1006780416. Throughput: 0: 11527.4. Samples: 1693590. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:53,977][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:57,088][626795] Updated weights for policy 0, policy_version 122902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:23:58,638][626795] Updated weights for policy 0, policy_version 122912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:58,975][24592] Fps is (10 sec: 35225.9, 60 sec: 44782.9, 300 sec: 43008.0). Total num frames: 1006895104. Throughput: 0: 10812.3. Samples: 1699656. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:23:58,978][24592] Avg episode reward: [(0, '4.317')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:00,547][626795] Updated weights for policy 0, policy_version 122922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:02,289][626795] Updated weights for policy 0, policy_version 122932 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:03,975][24592] Fps is (10 sec: 35226.5, 60 sec: 44509.9, 300 sec: 43144.5). Total num frames: 1007132672. Throughput: 0: 10743.4. Samples: 1770300. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:03,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:04,035][626795] Updated weights for policy 0, policy_version 122942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:05,544][626795] Updated weights for policy 0, policy_version 122952 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:07,326][626795] Updated weights for policy 0, policy_version 122962 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:08,910][626795] Updated weights for policy 0, policy_version 122972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:08,975][24592] Fps is (10 sec: 49152.0, 60 sec: 44646.7, 300 sec: 43369.4). Total num frames: 1007386624. Throughput: 0: 11500.0. Samples: 1843980. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:08,978][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:10,597][626795] Updated weights for policy 0, policy_version 122982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:12,176][626795] Updated weights for policy 0, policy_version 122992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:13,852][626795] Updated weights for policy 0, policy_version 123002 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:13,975][24592] Fps is (10 sec: 50790.3, 60 sec: 44646.4, 300 sec: 43581.4). Total num frames: 1007640576. Throughput: 0: 11591.5. Samples: 1881852. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:13,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:15,411][626795] Updated weights for policy 0, policy_version 123012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:17,119][626795] Updated weights for policy 0, policy_version 123022 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:18,746][626795] Updated weights for policy 0, policy_version 123032 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:18,975][24592] Fps is (10 sec: 49970.6, 60 sec: 44647.6, 300 sec: 43736.2). Total num frames: 1007886336. Throughput: 0: 11633.7. Samples: 1956396. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:18,978][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:20,377][626795] Updated weights for policy 0, policy_version 123042 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:22,028][626795] Updated weights for policy 0, policy_version 123052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:23,844][626795] Updated weights for policy 0, policy_version 123062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:23,975][24592] Fps is (10 sec: 49152.3, 60 sec: 46967.5, 300 sec: 43882.5). Total num frames: 1008132096. Throughput: 0: 11614.2. Samples: 2029812. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:23,976][24592] Avg episode reward: [(0, '4.284')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:25,589][626795] Updated weights for policy 0, policy_version 123072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:27,271][626795] Updated weights for policy 0, policy_version 123082 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:31,612][24592] Fps is (10 sec: 37599.1, 60 sec: 44990.2, 300 sec: 43333.6). Total num frames: 1008361472. Throughput: 0: 10939.9. Samples: 2066292. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:31,614][24592] Avg episode reward: [(0, '4.217')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:31,700][626795] Updated weights for policy 0, policy_version 123092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:33,647][626795] Updated weights for policy 0, policy_version 123102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:33,975][24592] Fps is (10 sec: 33587.2, 60 sec: 44646.4, 300 sec: 43354.6). Total num frames: 1008467968. Throughput: 0: 10710.3. Samples: 2102598. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:33,977][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:35,452][626795] Updated weights for policy 0, policy_version 123112 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:37,114][626795] Updated weights for policy 0, policy_version 123122 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:38,850][626795] Updated weights for policy 0, policy_version 123132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:38,976][24592] Fps is (10 sec: 45612.4, 60 sec: 44373.0, 300 sec: 43417.5). Total num frames: 1008697344. Throughput: 0: 10655.5. Samples: 2173092. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:38,978][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:40,493][626795] Updated weights for policy 0, policy_version 123142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:42,091][626795] Updated weights for policy 0, policy_version 123152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:43,836][626795] Updated weights for policy 0, policy_version 123162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44100.3, 300 sec: 43557.5). Total num frames: 1008943104. Throughput: 0: 11339.6. Samples: 2209938. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:43,978][24592] Avg episode reward: [(0, '4.415')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:45,485][626795] Updated weights for policy 0, policy_version 123172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:47,040][626795] Updated weights for policy 0, policy_version 123182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:48,720][626795] Updated weights for policy 0, policy_version 123192 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:48,975][24592] Fps is (10 sec: 49974.3, 60 sec: 44236.9, 300 sec: 43729.7). Total num frames: 1009197056. Throughput: 0: 11422.0. Samples: 2284290. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:48,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:50,432][626795] Updated weights for policy 0, policy_version 123202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:52,082][626795] Updated weights for policy 0, policy_version 123212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:53,679][626795] Updated weights for policy 0, policy_version 123222 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:53,975][24592] Fps is (10 sec: 49970.4, 60 sec: 44373.5, 300 sec: 43855.7). Total num frames: 1009442816. Throughput: 0: 11436.8. Samples: 2358636. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:53,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:55,396][626795] Updated weights for policy 0, policy_version 123232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:57,076][626795] Updated weights for policy 0, policy_version 123242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:24:58,868][626795] Updated weights for policy 0, policy_version 123252 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:58,975][24592] Fps is (10 sec: 49151.3, 60 sec: 46557.8, 300 sec: 43976.1). Total num frames: 1009688576. Throughput: 0: 11407.1. Samples: 2395170. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:24:58,976][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:00,517][626795] Updated weights for policy 0, policy_version 123262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:02,230][626795] Updated weights for policy 0, policy_version 123272 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:06,640][24592] Fps is (10 sec: 37517.1, 60 sec: 44447.4, 300 sec: 43503.1). Total num frames: 1009917952. Throughput: 0: 10716.0. Samples: 2467170. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:06,643][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:06,692][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000123282_1009926144.pth...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:06,696][626795] Updated weights for policy 0, policy_version 123282 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:06,751][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000122072_1000013824.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:08,650][626795] Updated weights for policy 0, policy_version 123292 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:08,975][24592] Fps is (10 sec: 33587.2, 60 sec: 43963.6, 300 sec: 43524.4). Total num frames: 1010024448. Throughput: 0: 10521.3. Samples: 2503272. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:08,977][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:10,495][626795] Updated weights for policy 0, policy_version 123302 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:12,059][626795] Updated weights for policy 0, policy_version 123312 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:13,799][626795] Updated weights for policy 0, policy_version 123322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:13,975][24592] Fps is (10 sec: 46905.0, 60 sec: 43690.7, 300 sec: 43609.3). Total num frames: 1010262016. Throughput: 0: 11143.1. Samples: 2538348. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:13,976][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:15,545][626795] Updated weights for policy 0, policy_version 123332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:17,201][626795] Updated weights for policy 0, policy_version 123342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:18,852][626795] Updated weights for policy 0, policy_version 123352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:18,975][24592] Fps is (10 sec: 47513.9, 60 sec: 43554.2, 300 sec: 43690.7). Total num frames: 1010499584. Throughput: 0: 11291.9. Samples: 2610732. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:18,976][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:20,443][626795] Updated weights for policy 0, policy_version 123362 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:22,192][626795] Updated weights for policy 0, policy_version 123372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:23,855][626795] Updated weights for policy 0, policy_version 123382 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:23,975][24592] Fps is (10 sec: 48332.2, 60 sec: 43554.0, 300 sec: 43802.1). Total num frames: 1010745344. Throughput: 0: 11368.2. Samples: 2684658. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:23,976][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:25,561][626795] Updated weights for policy 0, policy_version 123392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:27,130][626795] Updated weights for policy 0, policy_version 123402 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:28,855][626795] Updated weights for policy 0, policy_version 123412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:28,975][24592] Fps is (10 sec: 49971.2, 60 sec: 45984.7, 300 sec: 43941.9). Total num frames: 1010999296. Throughput: 0: 11376.5. Samples: 2721882. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:28,976][24592] Avg episode reward: [(0, '4.164')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:30,492][626795] Updated weights for policy 0, policy_version 123422 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:32,218][626795] Updated weights for policy 0, policy_version 123432 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:33,804][626795] Updated weights for policy 0, policy_version 123442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:33,975][24592] Fps is (10 sec: 49971.8, 60 sec: 46284.8, 300 sec: 44044.0). Total num frames: 1011245056. Throughput: 0: 11354.0. Samples: 2795220. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:33,976][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:35,629][626795] Updated weights for policy 0, policy_version 123452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:37,333][626795] Updated weights for policy 0, policy_version 123462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:41,661][24592] Fps is (10 sec: 36810.1, 60 sec: 44171.8, 300 sec: 43597.5). Total num frames: 1011466240. Throughput: 0: 10658.9. Samples: 2866908. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:41,662][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:41,854][626795] Updated weights for policy 0, policy_version 123472 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:43,682][626795] Updated weights for policy 0, policy_version 123482 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:43,976][24592] Fps is (10 sec: 32766.8, 60 sec: 43826.9, 300 sec: 43618.5). Total num frames: 1011572736. Throughput: 0: 10559.4. Samples: 2870346. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:43,976][24592] Avg episode reward: [(0, '4.424')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:45,615][626795] Updated weights for policy 0, policy_version 123492 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:47,273][626795] Updated weights for policy 0, policy_version 123502 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:48,975][24592] Fps is (10 sec: 45916.7, 60 sec: 43417.5, 300 sec: 43660.3). Total num frames: 1011802112. Throughput: 0: 11096.5. Samples: 2936946. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:48,977][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:49,031][626795] Updated weights for policy 0, policy_version 123512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:50,724][626795] Updated weights for policy 0, policy_version 123522 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:52,476][626795] Updated weights for policy 0, policy_version 123532 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:53,976][24592] Fps is (10 sec: 47513.7, 60 sec: 43417.4, 300 sec: 43760.1). Total num frames: 1012047872. Throughput: 0: 11242.1. Samples: 3009168. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:53,977][24592] Avg episode reward: [(0, '4.490')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:54,091][626795] Updated weights for policy 0, policy_version 123542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:55,843][626795] Updated weights for policy 0, policy_version 123552 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:57,453][626795] Updated weights for policy 0, policy_version 123562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:58,975][24592] Fps is (10 sec: 49152.3, 60 sec: 43417.7, 300 sec: 43856.5). Total num frames: 1012293632. Throughput: 0: 11271.5. Samples: 3045564. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:25:58,976][24592] Avg episode reward: [(0, '4.391')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:25:59,164][626795] Updated weights for policy 0, policy_version 123572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:00,792][626795] Updated weights for policy 0, policy_version 123582 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:02,467][626795] Updated weights for policy 0, policy_version 123592 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:03,979][24592] Fps is (10 sec: 48319.4, 60 sec: 45575.8, 300 sec: 43920.1). Total num frames: 1012531200. Throughput: 0: 11298.7. Samples: 3119208. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:03,980][24592] Avg episode reward: [(0, '4.499')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:04,151][626795] Updated weights for policy 0, policy_version 123602 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:05,877][626795] Updated weights for policy 0, policy_version 123612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:07,618][626795] Updated weights for policy 0, policy_version 123622 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:08,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45875.3, 300 sec: 44010.8). Total num frames: 1012776960. Throughput: 0: 11252.3. Samples: 3191010. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:08,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:09,324][626795] Updated weights for policy 0, policy_version 123632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:10,991][626795] Updated weights for policy 0, policy_version 123642 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:12,804][626795] Updated weights for policy 0, policy_version 123652 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:16,640][24592] Fps is (10 sec: 37525.7, 60 sec: 43793.7, 300 sec: 43648.1). Total num frames: 1013006336. Throughput: 0: 10586.3. Samples: 3226476. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:16,642][24592] Avg episode reward: [(0, '4.493')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:17,299][626795] Updated weights for policy 0, policy_version 123662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:18,975][24592] Fps is (10 sec: 32767.7, 60 sec: 43417.5, 300 sec: 44375.7). Total num frames: 1013104640. Throughput: 0: 10424.4. Samples: 3264318. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:18,976][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:19,145][626795] Updated weights for policy 0, policy_version 123672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:20,879][626795] Updated weights for policy 0, policy_version 123682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:22,696][626795] Updated weights for policy 0, policy_version 123692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:23,975][24592] Fps is (10 sec: 45788.8, 60 sec: 43281.2, 300 sec: 44736.7). Total num frames: 1013342208. Throughput: 0: 11009.3. Samples: 3332766. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:23,976][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:24,430][626795] Updated weights for policy 0, policy_version 123702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:26,217][626795] Updated weights for policy 0, policy_version 123712 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:27,711][626795] Updated weights for policy 0, policy_version 123722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:28,975][24592] Fps is (10 sec: 48333.5, 60 sec: 43144.6, 300 sec: 44736.7). Total num frames: 1013587968. Throughput: 0: 11086.1. Samples: 3369216. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:28,978][24592] Avg episode reward: [(0, '4.418')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:29,494][626795] Updated weights for policy 0, policy_version 123732 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:31,184][626795] Updated weights for policy 0, policy_version 123742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:32,828][626795] Updated weights for policy 0, policy_version 123752 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:33,976][24592] Fps is (10 sec: 48329.6, 60 sec: 43007.5, 300 sec: 45153.1). Total num frames: 1013825536. Throughput: 0: 11236.1. Samples: 3442578. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:33,978][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:34,598][626795] Updated weights for policy 0, policy_version 123762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:36,297][626795] Updated weights for policy 0, policy_version 123772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:37,676][626772] Signal inference workers to stop experience collection... (50 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:37,677][626772] Signal inference workers to resume experience collection... (50 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:37,682][626795] InferenceWorker_p0-w0: stopping experience collection (50 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:37,690][626795] InferenceWorker_p0-w0: resuming experience collection (50 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:37,877][626795] Updated weights for policy 0, policy_version 123782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:38,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45308.8, 300 sec: 45125.4). Total num frames: 1014063104. Throughput: 0: 11234.1. Samples: 3514698. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:38,977][24592] Avg episode reward: [(0, '4.380')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:39,661][626795] Updated weights for policy 0, policy_version 123792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:41,289][626795] Updated weights for policy 0, policy_version 123802 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:43,082][626795] Updated weights for policy 0, policy_version 123812 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:43,975][24592] Fps is (10 sec: 48336.3, 60 sec: 45602.5, 300 sec: 45097.7). Total num frames: 1014308864. Throughput: 0: 11229.3. Samples: 3550884. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:43,977][24592] Avg episode reward: [(0, '4.350')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:44,858][626795] Updated weights for policy 0, policy_version 123822 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:46,587][626795] Updated weights for policy 0, policy_version 123832 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:48,281][626795] Updated weights for policy 0, policy_version 123842 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:51,661][24592] Fps is (10 sec: 37455.2, 60 sec: 43648.6, 300 sec: 44608.3). Total num frames: 1014538240. Throughput: 0: 10534.4. Samples: 3621510. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:51,663][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:52,934][626795] Updated weights for policy 0, policy_version 123852 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:53,975][24592] Fps is (10 sec: 32767.7, 60 sec: 43144.8, 300 sec: 44486.7). Total num frames: 1014636544. Throughput: 0: 10363.6. Samples: 3657372. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:53,977][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:54,879][626795] Updated weights for policy 0, policy_version 123862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:56,596][626795] Updated weights for policy 0, policy_version 123872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:58,338][626795] Updated weights for policy 0, policy_version 123882 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:58,975][24592] Fps is (10 sec: 44797.6, 60 sec: 42871.4, 300 sec: 44403.4). Total num frames: 1014865920. Throughput: 0: 10972.5. Samples: 3691002. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:26:58,980][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:26:59,988][626795] Updated weights for policy 0, policy_version 123892 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:01,726][626795] Updated weights for policy 0, policy_version 123902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:03,322][626795] Updated weights for policy 0, policy_version 123912 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:03,982][24592] Fps is (10 sec: 47481.6, 60 sec: 43005.4, 300 sec: 44402.4). Total num frames: 1015111680. Throughput: 0: 11092.1. Samples: 3763536. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:03,983][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:04,019][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000123916_1015119872.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:04,071][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000122615_1004462080.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:05,038][626795] Updated weights for policy 0, policy_version 123922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:06,737][626795] Updated weights for policy 0, policy_version 123932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:08,432][626795] Updated weights for policy 0, policy_version 123942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:08,975][24592] Fps is (10 sec: 49152.5, 60 sec: 43008.0, 300 sec: 44875.5). Total num frames: 1015357440. Throughput: 0: 11194.0. Samples: 3836496. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:08,976][24592] Avg episode reward: [(0, '4.364')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:10,157][626795] Updated weights for policy 0, policy_version 123952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:11,884][626795] Updated weights for policy 0, policy_version 123962 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:13,534][626795] Updated weights for policy 0, policy_version 123972 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:13,976][24592] Fps is (10 sec: 48362.8, 60 sec: 45149.3, 300 sec: 44875.4). Total num frames: 1015595008. Throughput: 0: 11192.6. Samples: 3872892. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:13,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:15,167][626795] Updated weights for policy 0, policy_version 123982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:16,903][626795] Updated weights for policy 0, policy_version 123992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:18,660][626795] Updated weights for policy 0, policy_version 124002 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:18,975][24592] Fps is (10 sec: 47513.7, 60 sec: 45465.7, 300 sec: 44903.3). Total num frames: 1015832576. Throughput: 0: 11167.2. Samples: 3945096. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:18,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:20,359][626795] Updated weights for policy 0, policy_version 124012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:22,123][626795] Updated weights for policy 0, policy_version 124022 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:26,643][24592] Fps is (10 sec: 36862.1, 60 sec: 43399.3, 300 sec: 44418.3). Total num frames: 1016061952. Throughput: 0: 10495.6. Samples: 4015002. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:26,644][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:26,679][626795] Updated weights for policy 0, policy_version 124032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:28,595][626795] Updated weights for policy 0, policy_version 124042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:28,975][24592] Fps is (10 sec: 32767.7, 60 sec: 42871.4, 300 sec: 44236.8). Total num frames: 1016160256. Throughput: 0: 10393.3. Samples: 4018584. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:28,977][24592] Avg episode reward: [(0, '4.289')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:30,435][626795] Updated weights for policy 0, policy_version 124052 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:32,160][626795] Updated weights for policy 0, policy_version 124062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:33,756][626795] Updated weights for policy 0, policy_version 124072 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:33,975][24592] Fps is (10 sec: 46925.5, 60 sec: 43008.5, 300 sec: 44236.8). Total num frames: 1016406016. Throughput: 0: 10968.8. Samples: 4085652. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:33,978][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:35,560][626795] Updated weights for policy 0, policy_version 124082 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:37,230][626795] Updated weights for policy 0, policy_version 124092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:38,862][626795] Updated weights for policy 0, policy_version 124102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:38,976][24592] Fps is (10 sec: 48331.7, 60 sec: 43007.8, 300 sec: 44181.2). Total num frames: 1016643584. Throughput: 0: 11135.5. Samples: 4158474. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:38,977][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:40,544][626795] Updated weights for policy 0, policy_version 124112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:42,249][626795] Updated weights for policy 0, policy_version 124122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:43,919][626795] Updated weights for policy 0, policy_version 124132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:43,975][24592] Fps is (10 sec: 48333.4, 60 sec: 43008.0, 300 sec: 44681.1). Total num frames: 1016889344. Throughput: 0: 11207.9. Samples: 4195356. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:43,977][24592] Avg episode reward: [(0, '4.310')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:45,650][626795] Updated weights for policy 0, policy_version 124142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:47,332][626795] Updated weights for policy 0, policy_version 124152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:48,975][24592] Fps is (10 sec: 48334.5, 60 sec: 45166.1, 300 sec: 44653.4). Total num frames: 1017126912. Throughput: 0: 11208.9. Samples: 4267860. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:48,976][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:48,991][626795] Updated weights for policy 0, policy_version 124162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:50,738][626795] Updated weights for policy 0, policy_version 124172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:52,568][626795] Updated weights for policy 0, policy_version 124182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:53,975][24592] Fps is (10 sec: 47512.9, 60 sec: 45465.6, 300 sec: 44597.8). Total num frames: 1017364480. Throughput: 0: 11168.0. Samples: 4339056. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:27:53,976][24592] Avg episode reward: [(0, '4.342')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:54,194][626795] Updated weights for policy 0, policy_version 124192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:56,009][626795] Updated weights for policy 0, policy_version 124202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:27:57,935][626795] Updated weights for policy 0, policy_version 124212 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:01,633][24592] Fps is (10 sec: 35595.5, 60 sec: 43275.7, 300 sec: 44062.0). Total num frames: 1017577472. Throughput: 0: 10502.6. Samples: 4373418. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:01,634][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:02,562][626795] Updated weights for policy 0, policy_version 124222 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:03,976][24592] Fps is (10 sec: 31948.8, 60 sec: 42876.2, 300 sec: 43986.9). Total num frames: 1017683968. Throughput: 0: 10280.1. Samples: 4407702. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:03,978][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:04,436][626795] Updated weights for policy 0, policy_version 124232 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:06,301][626795] Updated weights for policy 0, policy_version 124242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:08,030][626795] Updated weights for policy 0, policy_version 124252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:08,975][24592] Fps is (10 sec: 45744.5, 60 sec: 42598.3, 300 sec: 43903.6). Total num frames: 1017913344. Throughput: 0: 10909.9. Samples: 4476840. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:08,976][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:09,755][626795] Updated weights for policy 0, policy_version 124262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:11,306][626795] Updated weights for policy 0, policy_version 124272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:13,064][626795] Updated weights for policy 0, policy_version 124282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:13,976][24592] Fps is (10 sec: 47511.6, 60 sec: 42735.0, 300 sec: 43903.7). Total num frames: 1018159104. Throughput: 0: 10987.1. Samples: 4513008. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:13,979][24592] Avg episode reward: [(0, '4.381')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:14,752][626795] Updated weights for policy 0, policy_version 124292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:16,470][626795] Updated weights for policy 0, policy_version 124302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:18,169][626795] Updated weights for policy 0, policy_version 124312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:18,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42734.9, 300 sec: 44347.9). Total num frames: 1018396672. Throughput: 0: 11102.3. Samples: 4585254. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:18,976][24592] Avg episode reward: [(0, '4.359')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:19,859][626795] Updated weights for policy 0, policy_version 124322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:21,518][626795] Updated weights for policy 0, policy_version 124332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:23,224][626795] Updated weights for policy 0, policy_version 124342 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:23,975][24592] Fps is (10 sec: 48334.7, 60 sec: 45009.3, 300 sec: 44403.4). Total num frames: 1018642432. Throughput: 0: 11101.0. Samples: 4658016. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:23,977][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:24,950][626795] Updated weights for policy 0, policy_version 124352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:26,688][626795] Updated weights for policy 0, policy_version 124362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:28,500][626795] Updated weights for policy 0, policy_version 124372 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:28,975][24592] Fps is (10 sec: 47513.8, 60 sec: 45192.6, 300 sec: 44347.9). Total num frames: 1018871808. Throughput: 0: 11066.3. Samples: 4693338. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:28,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:30,236][626795] Updated weights for policy 0, policy_version 124382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:31,949][626795] Updated weights for policy 0, policy_version 124392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:33,871][626795] Updated weights for policy 0, policy_version 124402 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:36,632][24592] Fps is (10 sec: 36246.8, 60 sec: 43015.0, 300 sec: 43897.1). Total num frames: 1019101184. Throughput: 0: 10392.4. Samples: 4763124. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:36,635][24592] Avg episode reward: [(0, '4.328')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:38,465][626795] Updated weights for policy 0, policy_version 124412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:38,975][24592] Fps is (10 sec: 32767.5, 60 sec: 42598.5, 300 sec: 43736.9). Total num frames: 1019199488. Throughput: 0: 10208.3. Samples: 4798428. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:38,978][24592] Avg episode reward: [(0, '4.254')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:40,314][626795] Updated weights for policy 0, policy_version 124422 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:42,001][626795] Updated weights for policy 0, policy_version 124432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:43,888][626795] Updated weights for policy 0, policy_version 124442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:43,975][24592] Fps is (10 sec: 44621.3, 60 sec: 42325.2, 300 sec: 43681.4). Total num frames: 1019428864. Throughput: 0: 10855.7. Samples: 4833072. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:43,976][24592] Avg episode reward: [(0, '4.292')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:45,418][626795] Updated weights for policy 0, policy_version 124452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:47,253][626795] Updated weights for policy 0, policy_version 124462 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:48,912][626795] Updated weights for policy 0, policy_version 124472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:48,975][24592] Fps is (10 sec: 47514.4, 60 sec: 42461.9, 300 sec: 43709.2). Total num frames: 1019674624. Throughput: 0: 11026.7. Samples: 4903902. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:48,976][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:50,580][626795] Updated weights for policy 0, policy_version 124482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:52,299][626795] Updated weights for policy 0, policy_version 124492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:53,975][24592] Fps is (10 sec: 48332.3, 60 sec: 42461.8, 300 sec: 44125.7). Total num frames: 1019912192. Throughput: 0: 11108.4. Samples: 4976718. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:53,976][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:54,107][626795] Updated weights for policy 0, policy_version 124502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:55,613][626795] Updated weights for policy 0, policy_version 124512 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:57,373][626795] Updated weights for policy 0, policy_version 124522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:58,975][24592] Fps is (10 sec: 48332.8, 60 sec: 45001.4, 300 sec: 44153.5). Total num frames: 1020157952. Throughput: 0: 11106.5. Samples: 5012796. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:28:58,976][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:28:59,143][626795] Updated weights for policy 0, policy_version 124532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:00,840][626795] Updated weights for policy 0, policy_version 124542 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:02,554][626795] Updated weights for policy 0, policy_version 124552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:03,976][24592] Fps is (10 sec: 47512.0, 60 sec: 45055.7, 300 sec: 44070.1). Total num frames: 1020387328. Throughput: 0: 11087.1. Samples: 5084178. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:03,977][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000124559_1020387328.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:04,045][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000123282_1009926144.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:04,288][626795] Updated weights for policy 0, policy_version 124562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:06,191][626795] Updated weights for policy 0, policy_version 124572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:07,848][626795] Updated weights for policy 0, policy_version 124582 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:11,606][24592] Fps is (10 sec: 35671.4, 60 sec: 43032.7, 300 sec: 43570.6). Total num frames: 1020608512. Throughput: 0: 10385.9. Samples: 5152704. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:11,609][24592] Avg episode reward: [(0, '4.223')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:12,589][626795] Updated weights for policy 0, policy_version 124592 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:13,975][24592] Fps is (10 sec: 32769.5, 60 sec: 42598.8, 300 sec: 43487.0). Total num frames: 1020715008. Throughput: 0: 10280.8. Samples: 5155974. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:13,976][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:14,510][626795] Updated weights for policy 0, policy_version 124602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:16,098][626795] Updated weights for policy 0, policy_version 124612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:17,869][626795] Updated weights for policy 0, policy_version 124622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:18,975][24592] Fps is (10 sec: 46689.8, 60 sec: 42598.4, 300 sec: 43459.3). Total num frames: 1020952576. Throughput: 0: 10880.4. Samples: 5223840. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:18,977][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:19,604][626795] Updated weights for policy 0, policy_version 124632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:21,264][626795] Updated weights for policy 0, policy_version 124642 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:23,037][626795] Updated weights for policy 0, policy_version 124652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:23,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42598.5, 300 sec: 43907.3). Total num frames: 1021198336. Throughput: 0: 11068.8. Samples: 5296524. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:23,977][24592] Avg episode reward: [(0, '4.246')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:24,588][626795] Updated weights for policy 0, policy_version 124662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:26,325][626795] Updated weights for policy 0, policy_version 124672 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:28,069][626795] Updated weights for policy 0, policy_version 124682 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:28,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42734.9, 300 sec: 43959.1). Total num frames: 1021435904. Throughput: 0: 11110.4. Samples: 5333040. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:28,978][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:29,718][626795] Updated weights for policy 0, policy_version 124692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:31,461][626795] Updated weights for policy 0, policy_version 124702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:33,077][626795] Updated weights for policy 0, policy_version 124712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:33,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44857.5, 300 sec: 43987.0). Total num frames: 1021673472. Throughput: 0: 11141.0. Samples: 5405250. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:33,977][24592] Avg episode reward: [(0, '4.324')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:34,818][626795] Updated weights for policy 0, policy_version 124722 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:36,690][626795] Updated weights for policy 0, policy_version 124732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:38,361][626795] Updated weights for policy 0, policy_version 124742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:38,975][24592] Fps is (10 sec: 47513.5, 60 sec: 45192.6, 300 sec: 43959.1). Total num frames: 1021911040. Throughput: 0: 11092.0. Samples: 5475858. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:38,978][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:40,014][626795] Updated weights for policy 0, policy_version 124752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:41,837][626795] Updated weights for policy 0, policy_version 124762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:43,650][626795] Updated weights for policy 0, policy_version 124772 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:46,587][24592] Fps is (10 sec: 36375.9, 60 sec: 43176.8, 300 sec: 43463.3). Total num frames: 1022132224. Throughput: 0: 10476.4. Samples: 5511594. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:46,588][24592] Avg episode reward: [(0, '4.340')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:48,287][626795] Updated weights for policy 0, policy_version 124782 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:48,975][24592] Fps is (10 sec: 32768.0, 60 sec: 42734.9, 300 sec: 43376.0). Total num frames: 1022238720. Throughput: 0: 10279.2. Samples: 5546736. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:48,976][24592] Avg episode reward: [(0, '4.209')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:50,233][626795] Updated weights for policy 0, policy_version 124792 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:51,971][626795] Updated weights for policy 0, policy_version 124802 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:53,609][626795] Updated weights for policy 0, policy_version 124812 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:53,975][24592] Fps is (10 sec: 45458.8, 60 sec: 42598.5, 300 sec: 43320.4). Total num frames: 1022468096. Throughput: 0: 10920.6. Samples: 5615400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:53,976][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:55,389][626795] Updated weights for policy 0, policy_version 124822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:57,146][626795] Updated weights for policy 0, policy_version 124832 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:29:58,798][626795] Updated weights for policy 0, policy_version 124842 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:58,976][24592] Fps is (10 sec: 46690.1, 60 sec: 42461.2, 300 sec: 43743.2). Total num frames: 1022705664. Throughput: 0: 10999.0. Samples: 5650938. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:29:58,978][24592] Avg episode reward: [(0, '4.418')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:00,476][626795] Updated weights for policy 0, policy_version 124852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:02,243][626795] Updated weights for policy 0, policy_version 124862 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:03,890][626795] Updated weights for policy 0, policy_version 124872 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:03,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42735.3, 300 sec: 43820.3). Total num frames: 1022951424. Throughput: 0: 11093.5. Samples: 5723046. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:03,976][24592] Avg episode reward: [(0, '4.309')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:05,618][626795] Updated weights for policy 0, policy_version 124882 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:07,254][626795] Updated weights for policy 0, policy_version 124892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:08,976][24592] Fps is (10 sec: 48334.9, 60 sec: 44979.9, 300 sec: 43820.2). Total num frames: 1023188992. Throughput: 0: 11088.3. Samples: 5795502. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:08,977][24592] Avg episode reward: [(0, '4.242')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:09,089][626795] Updated weights for policy 0, policy_version 124902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:10,839][626795] Updated weights for policy 0, policy_version 124912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:12,547][626795] Updated weights for policy 0, policy_version 124922 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:13,975][24592] Fps is (10 sec: 47513.4, 60 sec: 45192.5, 300 sec: 43820.3). Total num frames: 1023426560. Throughput: 0: 11048.1. Samples: 5830206. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:13,976][24592] Avg episode reward: [(0, '4.221')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:14,349][626795] Updated weights for policy 0, policy_version 124932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:16,044][626795] Updated weights for policy 0, policy_version 124942 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:17,922][626795] Updated weights for policy 0, policy_version 124952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:21,490][24592] Fps is (10 sec: 36004.0, 60 sec: 42981.5, 300 sec: 43339.8). Total num frames: 1023639552. Throughput: 0: 10408.4. Samples: 5899800. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:21,491][24592] Avg episode reward: [(0, '4.388')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:22,423][626795] Updated weights for policy 0, policy_version 124962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:23,976][24592] Fps is (10 sec: 32767.6, 60 sec: 42598.3, 300 sec: 43237.1). Total num frames: 1023754240. Throughput: 0: 10224.4. Samples: 5935956. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:23,979][24592] Avg episode reward: [(0, '4.320')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:24,306][626795] Updated weights for policy 0, policy_version 124972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:25,934][626795] Updated weights for policy 0, policy_version 124982 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:27,840][626795] Updated weights for policy 0, policy_version 124992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:28,975][24592] Fps is (10 sec: 47059.4, 60 sec: 42598.4, 300 sec: 43209.3). Total num frames: 1023991808. Throughput: 0: 10843.8. Samples: 5971248. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:28,976][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:29,466][626795] Updated weights for policy 0, policy_version 125002 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:31,193][626795] Updated weights for policy 0, policy_version 125012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:32,895][626795] Updated weights for policy 0, policy_version 125022 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:33,975][24592] Fps is (10 sec: 47514.2, 60 sec: 42598.4, 300 sec: 43662.3). Total num frames: 1024229376. Throughput: 0: 11022.5. Samples: 6042750. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:33,978][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:34,644][626795] Updated weights for policy 0, policy_version 125032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:36,275][626795] Updated weights for policy 0, policy_version 125042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:38,004][626795] Updated weights for policy 0, policy_version 125052 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:38,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42598.4, 300 sec: 43709.2). Total num frames: 1024466944. Throughput: 0: 11101.5. Samples: 6114966. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:38,976][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:39,620][626795] Updated weights for policy 0, policy_version 125062 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:41,372][626795] Updated weights for policy 0, policy_version 125072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:43,092][626795] Updated weights for policy 0, policy_version 125082 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:43,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44822.3, 300 sec: 43737.0). Total num frames: 1024704512. Throughput: 0: 11119.3. Samples: 6151296. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:43,977][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:44,830][626795] Updated weights for policy 0, policy_version 125092 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:46,600][626795] Updated weights for policy 0, policy_version 125102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:48,281][626795] Updated weights for policy 0, policy_version 125112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:48,976][24592] Fps is (10 sec: 47510.3, 60 sec: 45055.5, 300 sec: 43709.1). Total num frames: 1024942080. Throughput: 0: 11086.0. Samples: 6221922. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:48,978][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:50,206][626795] Updated weights for policy 0, policy_version 125122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:52,214][626795] Updated weights for policy 0, policy_version 125132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:56,485][24592] Fps is (10 sec: 34707.9, 60 sec: 42723.0, 300 sec: 43175.3). Total num frames: 1025138688. Throughput: 0: 10303.8. Samples: 6285024. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:56,487][24592] Avg episode reward: [(0, '4.364')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:57,090][626795] Updated weights for policy 0, policy_version 125142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:30:58,866][626795] Updated weights for policy 0, policy_version 125152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:58,976][24592] Fps is (10 sec: 30309.8, 60 sec: 42325.4, 300 sec: 43098.6). Total num frames: 1025245184. Throughput: 0: 10169.3. Samples: 6287832. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:30:58,978][24592] Avg episode reward: [(0, '4.192')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:00,838][626795] Updated weights for policy 0, policy_version 125162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:02,615][626795] Updated weights for policy 0, policy_version 125172 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:03,977][24592] Fps is (10 sec: 41550.4, 60 sec: 41641.6, 300 sec: 42959.2). Total num frames: 1025449984. Throughput: 0: 10659.2. Samples: 6352674. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:03,978][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:03,988][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000125177_1025449984.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:04,088][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000123916_1015119872.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:04,874][626795] Updated weights for policy 0, policy_version 125182 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:06,169][626772] Signal inference workers to stop experience collection... (100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:06,176][626772] Signal inference workers to resume experience collection... (100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:06,184][626795] InferenceWorker_p0-w0: stopping experience collection (100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:06,189][626795] InferenceWorker_p0-w0: resuming experience collection (100 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:06,670][626795] Updated weights for policy 0, policy_version 125192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:08,284][626795] Updated weights for policy 0, policy_version 125202 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:08,976][24592] Fps is (10 sec: 44238.3, 60 sec: 41642.6, 300 sec: 43378.9). Total num frames: 1025687552. Throughput: 0: 10715.1. Samples: 6418140. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:08,977][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:10,122][626795] Updated weights for policy 0, policy_version 125212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:11,862][626795] Updated weights for policy 0, policy_version 125222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:13,558][626795] Updated weights for policy 0, policy_version 125232 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:13,975][24592] Fps is (10 sec: 46701.0, 60 sec: 41506.1, 300 sec: 43431.5). Total num frames: 1025916928. Throughput: 0: 10702.9. Samples: 6452880. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:13,977][24592] Avg episode reward: [(0, '4.272')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:15,320][626795] Updated weights for policy 0, policy_version 125242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:17,104][626795] Updated weights for policy 0, policy_version 125252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:18,889][626795] Updated weights for policy 0, policy_version 125262 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:18,975][24592] Fps is (10 sec: 45877.6, 60 sec: 43606.8, 300 sec: 43403.7). Total num frames: 1026146304. Throughput: 0: 10680.8. Samples: 6523386. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:18,978][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:20,634][626795] Updated weights for policy 0, policy_version 125272 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:22,388][626795] Updated weights for policy 0, policy_version 125282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:23,976][24592] Fps is (10 sec: 45874.2, 60 sec: 43690.6, 300 sec: 43348.1). Total num frames: 1026375680. Throughput: 0: 10609.5. Samples: 6592398. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:23,977][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:24,227][626795] Updated weights for policy 0, policy_version 125292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:26,013][626795] Updated weights for policy 0, policy_version 125302 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:27,747][626795] Updated weights for policy 0, policy_version 125312 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:30,609][24592] Fps is (10 sec: 39433.0, 60 sec: 42399.6, 300 sec: 43081.9). Total num frames: 1026605056. Throughput: 0: 10204.3. Samples: 6627162. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:30,611][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:31,462][626795] Updated weights for policy 0, policy_version 125322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:33,225][626795] Updated weights for policy 0, policy_version 125332 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:33,975][24592] Fps is (10 sec: 37684.1, 60 sec: 42052.3, 300 sec: 43014.9). Total num frames: 1026752512. Throughput: 0: 10048.4. Samples: 6674094. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:33,976][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:34,971][626795] Updated weights for policy 0, policy_version 125342 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:36,772][626795] Updated weights for policy 0, policy_version 125352 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:38,483][626795] Updated weights for policy 0, policy_version 125362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:38,975][24592] Fps is (10 sec: 45041.8, 60 sec: 41915.7, 300 sec: 42959.4). Total num frames: 1026981888. Throughput: 0: 10807.9. Samples: 6744258. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:38,977][24592] Avg episode reward: [(0, '4.389')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:40,103][626795] Updated weights for policy 0, policy_version 125372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:41,834][626795] Updated weights for policy 0, policy_version 125382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:43,523][626795] Updated weights for policy 0, policy_version 125392 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:43,975][24592] Fps is (10 sec: 47513.9, 60 sec: 42052.3, 300 sec: 43410.1). Total num frames: 1027227648. Throughput: 0: 10962.4. Samples: 6781128. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:43,977][24592] Avg episode reward: [(0, '4.299')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:45,301][626795] Updated weights for policy 0, policy_version 125402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:46,942][626795] Updated weights for policy 0, policy_version 125412 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:48,659][626795] Updated weights for policy 0, policy_version 125422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:48,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42052.7, 300 sec: 43487.0). Total num frames: 1027465216. Throughput: 0: 11116.8. Samples: 6852912. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:48,978][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:50,353][626795] Updated weights for policy 0, policy_version 125432 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:52,104][626795] Updated weights for policy 0, policy_version 125442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:53,856][626795] Updated weights for policy 0, policy_version 125452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:53,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44600.3, 300 sec: 43514.8). Total num frames: 1027702784. Throughput: 0: 11247.5. Samples: 6924270. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:53,978][24592] Avg episode reward: [(0, '4.224')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:55,436][626795] Updated weights for policy 0, policy_version 125462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:57,267][626795] Updated weights for policy 0, policy_version 125472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:58,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44920.1, 300 sec: 43488.0). Total num frames: 1027940352. Throughput: 0: 11270.3. Samples: 6960042. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:31:58,978][24592] Avg episode reward: [(0, '4.309')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:31:59,068][626795] Updated weights for policy 0, policy_version 125482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:00,776][626795] Updated weights for policy 0, policy_version 125492 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:02,678][626795] Updated weights for policy 0, policy_version 125502 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:05,732][24592] Fps is (10 sec: 34839.8, 60 sec: 43112.2, 300 sec: 42981.2). Total num frames: 1028112384. Throughput: 0: 10803.9. Samples: 7028538. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:05,735][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:07,472][626795] Updated weights for policy 0, policy_version 125512 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:08,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42871.8, 300 sec: 42931.7). Total num frames: 1028259840. Throughput: 0: 10446.1. Samples: 7062468. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:08,977][24592] Avg episode reward: [(0, '4.124')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:09,383][626795] Updated weights for policy 0, policy_version 125522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:11,312][626795] Updated weights for policy 0, policy_version 125532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:13,062][626795] Updated weights for policy 0, policy_version 125542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:13,975][24592] Fps is (10 sec: 43725.9, 60 sec: 42598.4, 300 sec: 42848.3). Total num frames: 1028472832. Throughput: 0: 10770.5. Samples: 7094238. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:13,976][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:14,831][626795] Updated weights for policy 0, policy_version 125552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:16,650][626795] Updated weights for policy 0, policy_version 125562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:18,532][626795] Updated weights for policy 0, policy_version 125572 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:18,975][24592] Fps is (10 sec: 44236.4, 60 sec: 42598.3, 300 sec: 43239.4). Total num frames: 1028702208. Throughput: 0: 10836.4. Samples: 7161732. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:18,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:20,234][626795] Updated weights for policy 0, policy_version 125582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:21,974][626795] Updated weights for policy 0, policy_version 125592 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:23,692][626795] Updated weights for policy 0, policy_version 125602 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:23,976][24592] Fps is (10 sec: 46693.2, 60 sec: 42734.9, 300 sec: 43320.4). Total num frames: 1028939776. Throughput: 0: 10850.5. Samples: 7232532. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:23,976][24592] Avg episode reward: [(0, '4.251')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:25,538][626795] Updated weights for policy 0, policy_version 125612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:27,218][626795] Updated weights for policy 0, policy_version 125622 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:28,975][24592] Fps is (10 sec: 46694.9, 60 sec: 43931.1, 300 sec: 43264.9). Total num frames: 1029169152. Throughput: 0: 10813.5. Samples: 7267734. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:28,977][24592] Avg episode reward: [(0, '4.370')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:29,040][626795] Updated weights for policy 0, policy_version 125632 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:30,703][626795] Updated weights for policy 0, policy_version 125642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:32,381][626795] Updated weights for policy 0, policy_version 125652 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:33,976][24592] Fps is (10 sec: 46694.2, 60 sec: 44236.6, 300 sec: 43264.9). Total num frames: 1029406720. Throughput: 0: 10787.3. Samples: 7338342. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:33,977][24592] Avg episode reward: [(0, '4.225')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:34,223][626795] Updated weights for policy 0, policy_version 125662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:35,971][626795] Updated weights for policy 0, policy_version 125672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:37,790][626795] Updated weights for policy 0, policy_version 125682 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:40,116][24592] Fps is (10 sec: 38237.4, 60 sec: 42741.7, 300 sec: 42904.6). Total num frames: 1029595136. Throughput: 0: 9722.9. Samples: 7372890. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:40,117][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:41,691][626795] Updated weights for policy 0, policy_version 125692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:43,355][626795] Updated weights for policy 0, policy_version 125702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:43,976][24592] Fps is (10 sec: 36864.1, 60 sec: 42461.6, 300 sec: 42876.0). Total num frames: 1029775360. Throughput: 0: 10179.4. Samples: 7418118. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:43,978][24592] Avg episode reward: [(0, '4.354')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:45,212][626795] Updated weights for policy 0, policy_version 125712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:46,885][626795] Updated weights for policy 0, policy_version 125722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:48,706][626795] Updated weights for policy 0, policy_version 125732 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:48,976][24592] Fps is (10 sec: 46230.9, 60 sec: 42325.1, 300 sec: 42848.3). Total num frames: 1030004736. Throughput: 0: 10624.0. Samples: 7487958. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:48,978][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:50,407][626795] Updated weights for policy 0, policy_version 125742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:52,184][626795] Updated weights for policy 0, policy_version 125752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:53,905][626795] Updated weights for policy 0, policy_version 125762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:53,975][24592] Fps is (10 sec: 46695.6, 60 sec: 42325.3, 300 sec: 43321.9). Total num frames: 1030242304. Throughput: 0: 11005.9. Samples: 7557732. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:53,977][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:55,752][626795] Updated weights for policy 0, policy_version 125772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:57,476][626795] Updated weights for policy 0, policy_version 125782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:58,975][24592] Fps is (10 sec: 46696.3, 60 sec: 42188.8, 300 sec: 43348.2). Total num frames: 1030471680. Throughput: 0: 11075.6. Samples: 7592640. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:32:58,978][24592] Avg episode reward: [(0, '4.254')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:32:59,265][626795] Updated weights for policy 0, policy_version 125792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:00,920][626795] Updated weights for policy 0, policy_version 125802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:02,719][626795] Updated weights for policy 0, policy_version 125812 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:03,976][24592] Fps is (10 sec: 46691.4, 60 sec: 44585.9, 300 sec: 43375.9). Total num frames: 1030709248. Throughput: 0: 11134.5. Samples: 7662792. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:03,978][24592] Avg episode reward: [(0, '4.300')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000125819_1030709248.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:04,031][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000124559_1020387328.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:04,487][626795] Updated weights for policy 0, policy_version 125822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:06,236][626795] Updated weights for policy 0, policy_version 125832 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:07,973][626795] Updated weights for policy 0, policy_version 125842 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:08,975][24592] Fps is (10 sec: 46694.5, 60 sec: 44646.4, 300 sec: 43320.5). Total num frames: 1030938624. Throughput: 0: 11120.5. Samples: 7732950. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:08,976][24592] Avg episode reward: [(0, '4.338')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:09,689][626795] Updated weights for policy 0, policy_version 125852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:11,594][626795] Updated weights for policy 0, policy_version 125862 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:14,879][24592] Fps is (10 sec: 35314.2, 60 sec: 43042.5, 300 sec: 42911.3). Total num frames: 1031094272. Throughput: 0: 10876.7. Samples: 7767012. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:14,880][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:15,907][626795] Updated weights for policy 0, policy_version 125872 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:17,694][626795] Updated weights for policy 0, policy_version 125882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:18,975][24592] Fps is (10 sec: 34406.4, 60 sec: 43008.1, 300 sec: 42848.3). Total num frames: 1031282688. Throughput: 0: 10404.1. Samples: 7806522. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:18,976][24592] Avg episode reward: [(0, '4.353')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:19,441][626795] Updated weights for policy 0, policy_version 125892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:21,210][626795] Updated weights for policy 0, policy_version 125902 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:22,943][626795] Updated weights for policy 0, policy_version 125912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:23,975][24592] Fps is (10 sec: 45928.6, 60 sec: 42871.6, 300 sec: 42848.3). Total num frames: 1031512064. Throughput: 0: 11481.4. Samples: 7876458. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:23,978][24592] Avg episode reward: [(0, '4.377')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:24,768][626795] Updated weights for policy 0, policy_version 125922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:26,478][626795] Updated weights for policy 0, policy_version 125932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:28,243][626795] Updated weights for policy 0, policy_version 125942 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:28,976][24592] Fps is (10 sec: 45873.6, 60 sec: 42871.2, 300 sec: 43237.6). Total num frames: 1031741440. Throughput: 0: 10959.1. Samples: 7911276. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:28,977][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:29,952][626795] Updated weights for policy 0, policy_version 125952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:31,764][626795] Updated weights for policy 0, policy_version 125962 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:33,339][626795] Updated weights for policy 0, policy_version 125972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:33,975][24592] Fps is (10 sec: 47513.8, 60 sec: 43008.3, 300 sec: 43348.2). Total num frames: 1031987200. Throughput: 0: 10984.1. Samples: 7982238. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:33,977][24592] Avg episode reward: [(0, '4.359')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:35,163][626795] Updated weights for policy 0, policy_version 125982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:36,894][626795] Updated weights for policy 0, policy_version 125992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:38,627][626795] Updated weights for policy 0, policy_version 126002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:38,976][24592] Fps is (10 sec: 48333.7, 60 sec: 44676.3, 300 sec: 43375.9). Total num frames: 1032224768. Throughput: 0: 11014.4. Samples: 8053380. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:38,976][24592] Avg episode reward: [(0, '4.118')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:40,327][626795] Updated weights for policy 0, policy_version 126012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:42,144][626795] Updated weights for policy 0, policy_version 126022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:43,806][626795] Updated weights for policy 0, policy_version 126032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:43,975][24592] Fps is (10 sec: 46694.0, 60 sec: 44646.6, 300 sec: 43320.4). Total num frames: 1032454144. Throughput: 0: 11013.5. Samples: 8088246. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:43,976][24592] Avg episode reward: [(0, '4.250')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:45,746][626795] Updated weights for policy 0, policy_version 126042 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:49,740][24592] Fps is (10 sec: 34245.4, 60 sec: 42601.8, 300 sec: 42876.0). Total num frames: 1032593408. Throughput: 0: 10063.8. Samples: 8123352. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:49,741][24592] Avg episode reward: [(0, '4.418')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:50,169][626795] Updated weights for policy 0, policy_version 126052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:51,853][626795] Updated weights for policy 0, policy_version 126062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:53,622][626795] Updated weights for policy 0, policy_version 126072 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:53,976][24592] Fps is (10 sec: 33586.1, 60 sec: 42461.6, 300 sec: 42820.5). Total num frames: 1032790016. Throughput: 0: 10287.8. Samples: 8195904. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:53,981][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:55,511][626795] Updated weights for policy 0, policy_version 126082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:57,126][626795] Updated weights for policy 0, policy_version 126092 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:33:58,908][626795] Updated weights for policy 0, policy_version 126102 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:58,976][24592] Fps is (10 sec: 47009.7, 60 sec: 42597.9, 300 sec: 42848.3). Total num frames: 1033027584. Throughput: 0: 10517.8. Samples: 8230818. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:33:58,977][24592] Avg episode reward: [(0, '4.329')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:00,757][626795] Updated weights for policy 0, policy_version 126112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:02,442][626795] Updated weights for policy 0, policy_version 126122 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:03,975][24592] Fps is (10 sec: 47514.9, 60 sec: 42598.8, 300 sec: 43289.9). Total num frames: 1033265152. Throughput: 0: 10994.8. Samples: 8301288. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:03,977][24592] Avg episode reward: [(0, '4.435')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:04,140][626795] Updated weights for policy 0, policy_version 126132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:05,871][626795] Updated weights for policy 0, policy_version 126142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:07,563][626795] Updated weights for policy 0, policy_version 126152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:08,975][24592] Fps is (10 sec: 47516.8, 60 sec: 42734.9, 300 sec: 43348.2). Total num frames: 1033502720. Throughput: 0: 11046.4. Samples: 8373546. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:08,977][24592] Avg episode reward: [(0, '4.242')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:09,228][626795] Updated weights for policy 0, policy_version 126162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:10,956][626795] Updated weights for policy 0, policy_version 126172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:12,728][626795] Updated weights for policy 0, policy_version 126182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:13,975][24592] Fps is (10 sec: 47514.0, 60 sec: 44774.5, 300 sec: 43348.2). Total num frames: 1033740288. Throughput: 0: 11062.9. Samples: 8409102. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:13,976][24592] Avg episode reward: [(0, '4.274')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:14,556][626795] Updated weights for policy 0, policy_version 126192 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:16,211][626795] Updated weights for policy 0, policy_version 126202 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:18,005][626795] Updated weights for policy 0, policy_version 126212 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:18,975][24592] Fps is (10 sec: 46693.8, 60 sec: 44782.8, 300 sec: 43292.6). Total num frames: 1033969664. Throughput: 0: 11038.5. Samples: 8478972. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:18,978][24592] Avg episode reward: [(0, '4.295')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:19,874][626795] Updated weights for policy 0, policy_version 126222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:21,727][626795] Updated weights for policy 0, policy_version 126232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:24,627][24592] Fps is (10 sec: 33840.5, 60 sec: 42681.2, 300 sec: 42837.0). Total num frames: 1034100736. Throughput: 0: 10053.7. Samples: 8512344. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:24,628][24592] Avg episode reward: [(0, '4.312')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:26,127][626795] Updated weights for policy 0, policy_version 126242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:27,905][626795] Updated weights for policy 0, policy_version 126252 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:28,975][24592] Fps is (10 sec: 33587.7, 60 sec: 42735.2, 300 sec: 42820.6). Total num frames: 1034305536. Throughput: 0: 10271.3. Samples: 8550456. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:28,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:29,641][626795] Updated weights for policy 0, policy_version 126262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:31,401][626795] Updated weights for policy 0, policy_version 126272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:33,029][626795] Updated weights for policy 0, policy_version 126282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:33,975][24592] Fps is (10 sec: 47319.0, 60 sec: 42598.3, 300 sec: 42820.6). Total num frames: 1034543104. Throughput: 0: 11256.9. Samples: 8621304. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:33,977][24592] Avg episode reward: [(0, '4.321')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:34,760][626795] Updated weights for policy 0, policy_version 126292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:36,609][626795] Updated weights for policy 0, policy_version 126302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:38,150][626795] Updated weights for policy 0, policy_version 126312 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:38,975][24592] Fps is (10 sec: 47513.2, 60 sec: 42598.4, 300 sec: 43259.0). Total num frames: 1034780672. Throughput: 0: 11041.4. Samples: 8692764. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:38,977][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:39,923][626795] Updated weights for policy 0, policy_version 126322 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:41,725][626795] Updated weights for policy 0, policy_version 126332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:43,221][626795] Updated weights for policy 0, policy_version 126342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:43,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42871.5, 300 sec: 43348.2). Total num frames: 1035026432. Throughput: 0: 11072.7. Samples: 8729082. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:43,977][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:45,025][626795] Updated weights for policy 0, policy_version 126352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:46,855][626795] Updated weights for policy 0, policy_version 126362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:48,581][626795] Updated weights for policy 0, policy_version 126372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:48,975][24592] Fps is (10 sec: 47514.1, 60 sec: 44946.3, 300 sec: 43348.2). Total num frames: 1035255808. Throughput: 0: 11073.2. Samples: 8799582. Policy #0 lag: (min: 1.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:48,976][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:50,341][626795] Updated weights for policy 0, policy_version 126382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:52,083][626795] Updated weights for policy 0, policy_version 126392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:53,964][626795] Updated weights for policy 0, policy_version 126402 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:53,975][24592] Fps is (10 sec: 45874.8, 60 sec: 44919.7, 300 sec: 43320.5). Total num frames: 1035485184. Throughput: 0: 11009.6. Samples: 8868978. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:53,977][24592] Avg episode reward: [(0, '4.222')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:34:55,908][626795] Updated weights for policy 0, policy_version 126412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:59,610][24592] Fps is (10 sec: 33126.0, 60 sec: 42558.9, 300 sec: 42811.9). Total num frames: 1035608064. Throughput: 0: 10775.0. Samples: 8900808. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:34:59,611][24592] Avg episode reward: [(0, '4.508')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:00,434][626795] Updated weights for policy 0, policy_version 126422 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:02,289][626795] Updated weights for policy 0, policy_version 126432 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:03,975][24592] Fps is (10 sec: 31949.1, 60 sec: 42325.4, 300 sec: 42765.1). Total num frames: 1035804672. Throughput: 0: 10207.0. Samples: 8938284. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:03,976][24592] Avg episode reward: [(0, '4.343')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:03,982][626795] Updated weights for policy 0, policy_version 126442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000126442_1035812864.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:04,037][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000125177_1025449984.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:05,690][626795] Updated weights for policy 0, policy_version 126452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:07,464][626795] Updated weights for policy 0, policy_version 126462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:08,975][24592] Fps is (10 sec: 47230.3, 60 sec: 42461.9, 300 sec: 42792.8). Total num frames: 1036050432. Throughput: 0: 11209.5. Samples: 9009468. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:08,976][24592] Avg episode reward: [(0, '4.323')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:09,107][626795] Updated weights for policy 0, policy_version 126472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:10,876][626795] Updated weights for policy 0, policy_version 126482 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:12,565][626795] Updated weights for policy 0, policy_version 126492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:13,975][24592] Fps is (10 sec: 48332.6, 60 sec: 42461.9, 300 sec: 43244.7). Total num frames: 1036288000. Throughput: 0: 10994.9. Samples: 9045228. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:13,976][24592] Avg episode reward: [(0, '4.433')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:14,318][626795] Updated weights for policy 0, policy_version 126502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:16,041][626795] Updated weights for policy 0, policy_version 126512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:17,641][626795] Updated weights for policy 0, policy_version 126522 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:18,976][24592] Fps is (10 sec: 47509.9, 60 sec: 42598.0, 300 sec: 43292.5). Total num frames: 1036525568. Throughput: 0: 11007.6. Samples: 9116652. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:18,977][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:19,439][626795] Updated weights for policy 0, policy_version 126532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:21,222][626795] Updated weights for policy 0, policy_version 126542 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:22,958][626795] Updated weights for policy 0, policy_version 126552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:23,976][24592] Fps is (10 sec: 46692.5, 60 sec: 44722.0, 300 sec: 43264.8). Total num frames: 1036754944. Throughput: 0: 10981.3. Samples: 9186924. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:23,978][24592] Avg episode reward: [(0, '4.374')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:24,727][626795] Updated weights for policy 0, policy_version 126562 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:25,864][626772] Signal inference workers to stop experience collection... (150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:25,867][626772] Signal inference workers to resume experience collection... (150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:25,874][626795] InferenceWorker_p0-w0: stopping experience collection (150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:25,883][626795] InferenceWorker_p0-w0: resuming experience collection (150 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:26,499][626795] Updated weights for policy 0, policy_version 126572 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:28,365][626795] Updated weights for policy 0, policy_version 126582 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:28,975][24592] Fps is (10 sec: 45878.3, 60 sec: 44646.3, 300 sec: 43237.1). Total num frames: 1036984320. Throughput: 0: 10948.1. Samples: 9221748. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:28,978][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:30,193][626795] Updated weights for policy 0, policy_version 126592 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:34,602][24592] Fps is (10 sec: 33148.8, 60 sec: 42293.1, 300 sec: 42757.5). Total num frames: 1037107200. Throughput: 0: 9977.7. Samples: 9254832. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:34,603][24592] Avg episode reward: [(0, '4.368')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:34,871][626795] Updated weights for policy 0, policy_version 126602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:36,715][626795] Updated weights for policy 0, policy_version 126612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:38,459][626795] Updated weights for policy 0, policy_version 126622 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:38,975][24592] Fps is (10 sec: 31949.2, 60 sec: 42052.3, 300 sec: 42709.5). Total num frames: 1037303808. Throughput: 0: 10125.2. Samples: 9324612. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:38,976][24592] Avg episode reward: [(0, '4.374')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:40,262][626795] Updated weights for policy 0, policy_version 126632 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:41,891][626795] Updated weights for policy 0, policy_version 126642 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:43,665][626795] Updated weights for policy 0, policy_version 126652 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:43,975][24592] Fps is (10 sec: 46321.1, 60 sec: 41915.6, 300 sec: 42709.6). Total num frames: 1037541376. Throughput: 0: 10362.7. Samples: 9360564. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:43,976][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:45,274][626795] Updated weights for policy 0, policy_version 126662 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:47,014][626795] Updated weights for policy 0, policy_version 126672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:48,772][626795] Updated weights for policy 0, policy_version 126682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:48,975][24592] Fps is (10 sec: 48332.4, 60 sec: 42188.7, 300 sec: 43243.9). Total num frames: 1037787136. Throughput: 0: 10974.5. Samples: 9432138. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:48,976][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:50,512][626795] Updated weights for policy 0, policy_version 126692 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:52,228][626795] Updated weights for policy 0, policy_version 126702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:53,970][626795] Updated weights for policy 0, policy_version 126712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:53,975][24592] Fps is (10 sec: 48333.4, 60 sec: 42325.4, 300 sec: 43320.5). Total num frames: 1038024704. Throughput: 0: 10984.0. Samples: 9503748. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:53,976][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:55,613][626795] Updated weights for policy 0, policy_version 126722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:57,424][626795] Updated weights for policy 0, policy_version 126732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:58,975][24592] Fps is (10 sec: 46694.8, 60 sec: 44571.1, 300 sec: 43403.9). Total num frames: 1038254080. Throughput: 0: 10964.8. Samples: 9538644. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:35:58,977][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:35:59,259][626795] Updated weights for policy 0, policy_version 126742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:00,912][626795] Updated weights for policy 0, policy_version 126752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:02,833][626795] Updated weights for policy 0, policy_version 126762 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:03,976][24592] Fps is (10 sec: 45053.7, 60 sec: 44509.5, 300 sec: 43348.2). Total num frames: 1038475264. Throughput: 0: 10907.5. Samples: 9607488. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:03,977][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:04,774][626795] Updated weights for policy 0, policy_version 126772 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:06,609][626795] Updated weights for policy 0, policy_version 126782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:09,571][24592] Fps is (10 sec: 33244.4, 60 sec: 42179.5, 300 sec: 42928.2). Total num frames: 1038606336. Throughput: 0: 9930.6. Samples: 9639714. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:09,572][24592] Avg episode reward: [(0, '4.887')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:11,120][626795] Updated weights for policy 0, policy_version 126792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:12,951][626795] Updated weights for policy 0, policy_version 126802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:13,975][24592] Fps is (10 sec: 33588.9, 60 sec: 42052.3, 300 sec: 42931.6). Total num frames: 1038811136. Throughput: 0: 10116.4. Samples: 9676986. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:13,976][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:14,568][626795] Updated weights for policy 0, policy_version 126812 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:16,328][626795] Updated weights for policy 0, policy_version 126822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:18,038][626795] Updated weights for policy 0, policy_version 126832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:18,975][24592] Fps is (10 sec: 47039.8, 60 sec: 42052.8, 300 sec: 42959.4). Total num frames: 1039048704. Throughput: 0: 11119.4. Samples: 9748236. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:18,977][24592] Avg episode reward: [(0, '4.421')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:19,808][626795] Updated weights for policy 0, policy_version 126842 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:21,424][626795] Updated weights for policy 0, policy_version 126852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:23,153][626795] Updated weights for policy 0, policy_version 126862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:23,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42189.1, 300 sec: 43226.6). Total num frames: 1039286272. Throughput: 0: 10998.7. Samples: 9819552. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:23,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:24,902][626795] Updated weights for policy 0, policy_version 126872 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:26,626][626795] Updated weights for policy 0, policy_version 126882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:28,287][626795] Updated weights for policy 0, policy_version 126892 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:28,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42325.3, 300 sec: 43292.6). Total num frames: 1039523840. Throughput: 0: 11004.3. Samples: 9855756. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:28,977][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:30,076][626795] Updated weights for policy 0, policy_version 126902 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:31,882][626795] Updated weights for policy 0, policy_version 126912 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:33,541][626795] Updated weights for policy 0, policy_version 126922 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:33,976][24592] Fps is (10 sec: 46693.1, 60 sec: 44565.7, 300 sec: 43292.6). Total num frames: 1039753216. Throughput: 0: 10975.9. Samples: 9926058. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:33,979][24592] Avg episode reward: [(0, '4.290')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:35,359][626795] Updated weights for policy 0, policy_version 126932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:37,121][626795] Updated weights for policy 0, policy_version 126942 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:38,976][24592] Fps is (10 sec: 45871.3, 60 sec: 44645.7, 300 sec: 43237.0). Total num frames: 1039982592. Throughput: 0: 10905.8. Samples: 9994518. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:38,978][24592] Avg episode reward: [(0, '4.250')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:39,055][626795] Updated weights for policy 0, policy_version 126952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:40,921][626795] Updated weights for policy 0, policy_version 126962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:44,541][24592] Fps is (10 sec: 33341.4, 60 sec: 42336.0, 300 sec: 42766.4). Total num frames: 1040105472. Throughput: 0: 10703.0. Samples: 10026330. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:44,542][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:45,590][626795] Updated weights for policy 0, policy_version 126972 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:47,363][626795] Updated weights for policy 0, policy_version 126982 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:48,975][24592] Fps is (10 sec: 32770.9, 60 sec: 42052.3, 300 sec: 42737.2). Total num frames: 1040310272. Throughput: 0: 10140.2. Samples: 10063794. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:48,976][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:49,029][626795] Updated weights for policy 0, policy_version 126992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:50,816][626795] Updated weights for policy 0, policy_version 127002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:52,419][626795] Updated weights for policy 0, policy_version 127012 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:53,975][24592] Fps is (10 sec: 46887.7, 60 sec: 42052.2, 300 sec: 42737.2). Total num frames: 1040547840. Throughput: 0: 11161.7. Samples: 10135338. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:53,978][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:54,205][626795] Updated weights for policy 0, policy_version 127022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:55,972][626795] Updated weights for policy 0, policy_version 127032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:57,687][626795] Updated weights for policy 0, policy_version 127042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:58,975][24592] Fps is (10 sec: 47514.0, 60 sec: 42188.8, 300 sec: 43216.8). Total num frames: 1040785408. Throughput: 0: 10960.8. Samples: 10170222. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:36:58,976][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:36:59,457][626795] Updated weights for policy 0, policy_version 127052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:01,152][626795] Updated weights for policy 0, policy_version 127062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:02,890][626795] Updated weights for policy 0, policy_version 127072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:03,976][24592] Fps is (10 sec: 47509.3, 60 sec: 42461.5, 300 sec: 43264.7). Total num frames: 1041022976. Throughput: 0: 10957.8. Samples: 10241346. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:03,978][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:03,993][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000127079_1041031168.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:04,040][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000125819_1030709248.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:04,597][626795] Updated weights for policy 0, policy_version 127082 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:06,389][626795] Updated weights for policy 0, policy_version 127092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:08,173][626795] Updated weights for policy 0, policy_version 127102 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:08,976][24592] Fps is (10 sec: 46693.3, 60 sec: 44542.5, 300 sec: 43320.4). Total num frames: 1041252352. Throughput: 0: 10932.6. Samples: 10311522. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:08,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:09,864][626795] Updated weights for policy 0, policy_version 127112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:11,721][626795] Updated weights for policy 0, policy_version 127122 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:13,534][626795] Updated weights for policy 0, policy_version 127132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:13,976][24592] Fps is (10 sec: 45878.3, 60 sec: 44509.6, 300 sec: 43320.4). Total num frames: 1041481728. Throughput: 0: 10893.4. Samples: 10345962. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:13,977][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:15,531][626795] Updated weights for policy 0, policy_version 127142 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:19,506][24592] Fps is (10 sec: 32672.3, 60 sec: 42089.4, 300 sec: 42826.8). Total num frames: 1041596416. Throughput: 0: 9929.0. Samples: 10378134. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:19,507][24592] Avg episode reward: [(0, '4.404')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:20,086][626795] Updated weights for policy 0, policy_version 127152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:21,983][626795] Updated weights for policy 0, policy_version 127162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:23,788][626795] Updated weights for policy 0, policy_version 127172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:23,975][24592] Fps is (10 sec: 31949.7, 60 sec: 41915.7, 300 sec: 42820.6). Total num frames: 1041801216. Throughput: 0: 10091.7. Samples: 10448634. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:23,976][24592] Avg episode reward: [(0, '4.404')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:25,303][626795] Updated weights for policy 0, policy_version 127182 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:27,041][626795] Updated weights for policy 0, policy_version 127192 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:28,652][626795] Updated weights for policy 0, policy_version 127202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:28,976][24592] Fps is (10 sec: 47581.3, 60 sec: 42052.2, 300 sec: 42848.3). Total num frames: 1042046976. Throughput: 0: 10329.9. Samples: 10485336. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:28,978][24592] Avg episode reward: [(0, '4.255')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:30,474][626795] Updated weights for policy 0, policy_version 127212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:32,110][626795] Updated weights for policy 0, policy_version 127222 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:33,808][626795] Updated weights for policy 0, policy_version 127232 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:33,975][24592] Fps is (10 sec: 48332.4, 60 sec: 42188.9, 300 sec: 43181.9). Total num frames: 1042284544. Throughput: 0: 10980.4. Samples: 10557912. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:33,976][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:35,448][626795] Updated weights for policy 0, policy_version 127242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:37,131][626795] Updated weights for policy 0, policy_version 127252 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:38,805][626795] Updated weights for policy 0, policy_version 127262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:38,976][24592] Fps is (10 sec: 48332.1, 60 sec: 42462.3, 300 sec: 43237.1). Total num frames: 1042530304. Throughput: 0: 11013.8. Samples: 10630962. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:38,976][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:40,573][626795] Updated weights for policy 0, policy_version 127272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:42,356][626795] Updated weights for policy 0, policy_version 127282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:43,975][24592] Fps is (10 sec: 48333.1, 60 sec: 44795.5, 300 sec: 43264.9). Total num frames: 1042767872. Throughput: 0: 11030.4. Samples: 10666590. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:43,976][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:44,008][626795] Updated weights for policy 0, policy_version 127292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:45,747][626795] Updated weights for policy 0, policy_version 127302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:47,620][626795] Updated weights for policy 0, policy_version 127312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:48,976][24592] Fps is (10 sec: 46695.2, 60 sec: 44782.8, 300 sec: 43237.1). Total num frames: 1042997248. Throughput: 0: 11001.5. Samples: 10736406. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:48,978][24592] Avg episode reward: [(0, '4.296')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:49,476][626795] Updated weights for policy 0, policy_version 127322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:51,439][626795] Updated weights for policy 0, policy_version 127332 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:54,452][24592] Fps is (10 sec: 33622.4, 60 sec: 42533.5, 300 sec: 42806.9). Total num frames: 1043120128. Throughput: 0: 10073.7. Samples: 10769640. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:54,454][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:55,723][626795] Updated weights for policy 0, policy_version 127342 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:57,485][626795] Updated weights for policy 0, policy_version 127352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:58,975][24592] Fps is (10 sec: 34407.2, 60 sec: 42598.4, 300 sec: 42820.7). Total num frames: 1043341312. Throughput: 0: 10260.9. Samples: 10807698. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:37:58,976][24592] Avg episode reward: [(0, '4.313')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:37:59,162][626795] Updated weights for policy 0, policy_version 127362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:00,805][626795] Updated weights for policy 0, policy_version 127372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:02,483][626795] Updated weights for policy 0, policy_version 127382 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:03,975][24592] Fps is (10 sec: 48172.3, 60 sec: 42599.1, 300 sec: 42848.3). Total num frames: 1043578880. Throughput: 0: 11295.4. Samples: 10880430. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:03,976][24592] Avg episode reward: [(0, '4.264')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:04,202][626795] Updated weights for policy 0, policy_version 127392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:05,878][626795] Updated weights for policy 0, policy_version 127402 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:07,602][626795] Updated weights for policy 0, policy_version 127412 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:08,977][24592] Fps is (10 sec: 48328.4, 60 sec: 42871.0, 300 sec: 43286.2). Total num frames: 1043824640. Throughput: 0: 11225.5. Samples: 10953792. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:08,978][24592] Avg episode reward: [(0, '4.305')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:09,312][626795] Updated weights for policy 0, policy_version 127422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:10,990][626795] Updated weights for policy 0, policy_version 127432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:12,698][626795] Updated weights for policy 0, policy_version 127442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:13,975][24592] Fps is (10 sec: 48332.6, 60 sec: 43008.2, 300 sec: 43320.4). Total num frames: 1044062208. Throughput: 0: 11197.5. Samples: 10989222. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:13,977][24592] Avg episode reward: [(0, '4.330')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:14,389][626795] Updated weights for policy 0, policy_version 127452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:16,149][626795] Updated weights for policy 0, policy_version 127462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:17,846][626795] Updated weights for policy 0, policy_version 127472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:18,975][24592] Fps is (10 sec: 47517.7, 60 sec: 45458.3, 300 sec: 43348.2). Total num frames: 1044299776. Throughput: 0: 11191.0. Samples: 11061504. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:18,976][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:19,516][626795] Updated weights for policy 0, policy_version 127482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:21,297][626795] Updated weights for policy 0, policy_version 127492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:23,134][626795] Updated weights for policy 0, policy_version 127502 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:23,975][24592] Fps is (10 sec: 46694.2, 60 sec: 45465.6, 300 sec: 43348.2). Total num frames: 1044529152. Throughput: 0: 11104.3. Samples: 11130654. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:23,976][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:25,022][626795] Updated weights for policy 0, policy_version 127512 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:29,381][24592] Fps is (10 sec: 33063.6, 60 sec: 42990.2, 300 sec: 42844.9). Total num frames: 1044643840. Throughput: 0: 10930.2. Samples: 11162886. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:29,382][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:29,695][626795] Updated weights for policy 0, policy_version 127522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:31,273][626795] Updated weights for policy 0, policy_version 127532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:32,941][626795] Updated weights for policy 0, policy_version 127542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:33,976][24592] Fps is (10 sec: 33585.5, 60 sec: 43007.7, 300 sec: 42848.3). Total num frames: 1044865024. Throughput: 0: 10368.6. Samples: 11202996. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:33,977][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:34,646][626795] Updated weights for policy 0, policy_version 127552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:36,318][626795] Updated weights for policy 0, policy_version 127562 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:38,052][626795] Updated weights for policy 0, policy_version 127572 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:38,976][24592] Fps is (10 sec: 48670.3, 60 sec: 43008.2, 300 sec: 42903.8). Total num frames: 1045110784. Throughput: 0: 11364.6. Samples: 11275632. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:38,977][24592] Avg episode reward: [(0, '4.386')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:39,743][626795] Updated weights for policy 0, policy_version 127582 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:41,341][626795] Updated weights for policy 0, policy_version 127592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:43,135][626795] Updated weights for policy 0, policy_version 127602 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:43,975][24592] Fps is (10 sec: 49154.5, 60 sec: 43144.5, 300 sec: 43377.3). Total num frames: 1045356544. Throughput: 0: 11216.9. Samples: 11312460. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:43,977][24592] Avg episode reward: [(0, '4.338')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:44,871][626795] Updated weights for policy 0, policy_version 127612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:46,385][626795] Updated weights for policy 0, policy_version 127622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:48,195][626795] Updated weights for policy 0, policy_version 127632 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:48,976][24592] Fps is (10 sec: 48331.8, 60 sec: 43281.0, 300 sec: 43403.7). Total num frames: 1045594112. Throughput: 0: 11205.8. Samples: 11384694. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:48,977][24592] Avg episode reward: [(0, '4.219')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:49,876][626795] Updated weights for policy 0, policy_version 127642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:51,657][626795] Updated weights for policy 0, policy_version 127652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:53,399][626795] Updated weights for policy 0, policy_version 127662 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:53,975][24592] Fps is (10 sec: 47513.7, 60 sec: 45554.5, 300 sec: 43403.8). Total num frames: 1045831680. Throughput: 0: 11153.0. Samples: 11455668. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:53,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:55,030][626795] Updated weights for policy 0, policy_version 127672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:56,856][626795] Updated weights for policy 0, policy_version 127682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:38:58,793][626795] Updated weights for policy 0, policy_version 127692 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:58,975][24592] Fps is (10 sec: 46695.8, 60 sec: 45329.0, 300 sec: 43376.0). Total num frames: 1046061056. Throughput: 0: 11162.5. Samples: 11491536. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:38:58,976][24592] Avg episode reward: [(0, '4.311')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:00,693][626795] Updated weights for policy 0, policy_version 127702 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:04,312][24592] Fps is (10 sec: 33286.4, 60 sec: 43039.7, 300 sec: 42910.5). Total num frames: 1046175744. Throughput: 0: 10199.1. Samples: 11523894. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:04,314][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:04,318][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000127707_1046175744.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:04,374][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000126442_1035812864.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:05,120][626795] Updated weights for policy 0, policy_version 127712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:06,846][626795] Updated weights for policy 0, policy_version 127722 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:08,523][626795] Updated weights for policy 0, policy_version 127732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:08,975][24592] Fps is (10 sec: 33587.4, 60 sec: 42872.1, 300 sec: 42903.9). Total num frames: 1046396928. Throughput: 0: 10381.1. Samples: 11597802. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:08,976][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:10,181][626795] Updated weights for policy 0, policy_version 127742 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:11,896][626795] Updated weights for policy 0, policy_version 127752 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:13,577][626795] Updated weights for policy 0, policy_version 127762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:13,975][24592] Fps is (10 sec: 48319.8, 60 sec: 43007.9, 300 sec: 42959.4). Total num frames: 1046642688. Throughput: 0: 10559.5. Samples: 11633778. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:13,977][24592] Avg episode reward: [(0, '4.227')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:15,224][626795] Updated weights for policy 0, policy_version 127772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:16,946][626795] Updated weights for policy 0, policy_version 127782 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:18,536][626795] Updated weights for policy 0, policy_version 127792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:18,975][24592] Fps is (10 sec: 49151.6, 60 sec: 43144.5, 300 sec: 43444.1). Total num frames: 1046888448. Throughput: 0: 11212.9. Samples: 11707572. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:18,977][24592] Avg episode reward: [(0, '4.343')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:20,262][626795] Updated weights for policy 0, policy_version 127802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:21,942][626795] Updated weights for policy 0, policy_version 127812 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:23,629][626795] Updated weights for policy 0, policy_version 127822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:23,975][24592] Fps is (10 sec: 49151.9, 60 sec: 43417.6, 300 sec: 43487.0). Total num frames: 1047134208. Throughput: 0: 11231.2. Samples: 11781036. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:23,977][24592] Avg episode reward: [(0, '4.301')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:25,347][626795] Updated weights for policy 0, policy_version 127832 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:27,118][626795] Updated weights for policy 0, policy_version 127842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:28,843][626795] Updated weights for policy 0, policy_version 127852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:28,975][24592] Fps is (10 sec: 47514.0, 60 sec: 45638.0, 300 sec: 43459.3). Total num frames: 1047363584. Throughput: 0: 11186.4. Samples: 11815848. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:28,976][24592] Avg episode reward: [(0, '4.349')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:30,562][626795] Updated weights for policy 0, policy_version 127862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:32,332][626795] Updated weights for policy 0, policy_version 127872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:33,975][24592] Fps is (10 sec: 45875.6, 60 sec: 45466.0, 300 sec: 43431.5). Total num frames: 1047592960. Throughput: 0: 11130.6. Samples: 11885568. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:33,976][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:34,288][626795] Updated weights for policy 0, policy_version 127882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:36,119][626795] Updated weights for policy 0, policy_version 127892 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:39,259][24592] Fps is (10 sec: 32660.3, 60 sec: 42941.5, 300 sec: 42918.1). Total num frames: 1047699456. Throughput: 0: 10206.4. Samples: 11917854. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:39,263][24592] Avg episode reward: [(0, '4.294')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:40,660][626795] Updated weights for policy 0, policy_version 127902 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:42,196][626795] Updated weights for policy 0, policy_version 127912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:43,975][24592] Fps is (10 sec: 33587.4, 60 sec: 42871.5, 300 sec: 42959.4). Total num frames: 1047928832. Throughput: 0: 10323.2. Samples: 11956080. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:43,978][24592] Avg episode reward: [(0, '4.209')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:43,999][626795] Updated weights for policy 0, policy_version 127922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:45,669][626795] Updated weights for policy 0, policy_version 127932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:47,454][626795] Updated weights for policy 0, policy_version 127942 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:48,975][24592] Fps is (10 sec: 48901.5, 60 sec: 43008.3, 300 sec: 43015.0). Total num frames: 1048174592. Throughput: 0: 11313.9. Samples: 12029214. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:48,976][24592] Avg episode reward: [(0, '4.403')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:49,013][626795] Updated weights for policy 0, policy_version 127952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:50,641][626795] Updated weights for policy 0, policy_version 127962 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:52,502][626795] Updated weights for policy 0, policy_version 127972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:53,975][24592] Fps is (10 sec: 48332.1, 60 sec: 43007.9, 300 sec: 43497.2). Total num frames: 1048412160. Throughput: 0: 11172.4. Samples: 12100560. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:53,976][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:54,298][626795] Updated weights for policy 0, policy_version 127982 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:55,928][626795] Updated weights for policy 0, policy_version 127992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:57,749][626795] Updated weights for policy 0, policy_version 128002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:58,975][24592] Fps is (10 sec: 47513.8, 60 sec: 43144.6, 300 sec: 43542.6). Total num frames: 1048649728. Throughput: 0: 11184.4. Samples: 12137076. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:39:58,977][24592] Avg episode reward: [(0, '4.360')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:39:59,255][626795] Updated weights for policy 0, policy_version 128012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:01,041][626795] Updated weights for policy 0, policy_version 128022 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:02,751][626795] Updated weights for policy 0, policy_version 128032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:03,976][24592] Fps is (10 sec: 48331.4, 60 sec: 45584.4, 300 sec: 43542.5). Total num frames: 1048895488. Throughput: 0: 11136.1. Samples: 12208698. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:03,977][24592] Avg episode reward: [(0, '4.320')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:04,548][626795] Updated weights for policy 0, policy_version 128042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:05,958][626772] Signal inference workers to stop experience collection... (200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:05,961][626772] Signal inference workers to resume experience collection... (200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:05,973][626795] InferenceWorker_p0-w0: stopping experience collection (200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:05,981][626795] InferenceWorker_p0-w0: resuming experience collection (200 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:06,194][626795] Updated weights for policy 0, policy_version 128052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:08,151][626795] Updated weights for policy 0, policy_version 128062 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:08,976][24592] Fps is (10 sec: 46692.5, 60 sec: 45328.8, 300 sec: 43487.0). Total num frames: 1049116672. Throughput: 0: 11026.7. Samples: 12277242. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:08,977][24592] Avg episode reward: [(0, '4.232')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:09,888][626795] Updated weights for policy 0, policy_version 128072 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:14,149][24592] Fps is (10 sec: 33015.2, 60 sec: 43020.1, 300 sec: 43045.3). Total num frames: 1049231360. Throughput: 0: 10213.8. Samples: 12277242. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:14,152][24592] Avg episode reward: [(0, '4.263')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:14,485][626795] Updated weights for policy 0, policy_version 128082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:16,149][626795] Updated weights for policy 0, policy_version 128092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:17,979][626795] Updated weights for policy 0, policy_version 128102 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:18,976][24592] Fps is (10 sec: 34404.6, 60 sec: 42870.9, 300 sec: 43070.4). Total num frames: 1049460736. Throughput: 0: 10338.3. Samples: 12350802. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:18,977][24592] Avg episode reward: [(0, '4.281')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:19,602][626795] Updated weights for policy 0, policy_version 128112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:21,309][626795] Updated weights for policy 0, policy_version 128122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:22,990][626795] Updated weights for policy 0, policy_version 128132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:23,976][24592] Fps is (10 sec: 47517.7, 60 sec: 42734.8, 300 sec: 43098.2). Total num frames: 1049698304. Throughput: 0: 11293.9. Samples: 12422880. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:23,977][24592] Avg episode reward: [(0, '4.359')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:24,669][626795] Updated weights for policy 0, policy_version 128142 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:26,376][626795] Updated weights for policy 0, policy_version 128152 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:28,030][626795] Updated weights for policy 0, policy_version 128162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:28,976][24592] Fps is (10 sec: 48336.8, 60 sec: 43007.9, 300 sec: 43607.5). Total num frames: 1049944064. Throughput: 0: 11172.5. Samples: 12458844. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:28,978][24592] Avg episode reward: [(0, '4.403')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:29,798][626795] Updated weights for policy 0, policy_version 128172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:31,405][626795] Updated weights for policy 0, policy_version 128182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:33,182][626795] Updated weights for policy 0, policy_version 128192 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:33,975][24592] Fps is (10 sec: 49153.7, 60 sec: 43281.1, 300 sec: 43681.4). Total num frames: 1050189824. Throughput: 0: 11180.4. Samples: 12532332. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:33,977][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:34,744][626795] Updated weights for policy 0, policy_version 128202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:36,501][626795] Updated weights for policy 0, policy_version 128212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:38,240][626795] Updated weights for policy 0, policy_version 128222 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:38,975][24592] Fps is (10 sec: 48333.1, 60 sec: 45681.7, 300 sec: 43681.4). Total num frames: 1050427392. Throughput: 0: 11204.2. Samples: 12604746. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:38,976][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:39,966][626795] Updated weights for policy 0, policy_version 128232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:41,687][626795] Updated weights for policy 0, policy_version 128242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:43,450][626795] Updated weights for policy 0, policy_version 128252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:43,975][24592] Fps is (10 sec: 46694.4, 60 sec: 45465.5, 300 sec: 43625.9). Total num frames: 1050656768. Throughput: 0: 11168.1. Samples: 12639642. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:43,976][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:45,353][626795] Updated weights for policy 0, policy_version 128262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:49,158][24592] Fps is (10 sec: 32985.0, 60 sec: 43013.7, 300 sec: 43154.9). Total num frames: 1050763264. Throughput: 0: 10288.0. Samples: 12673530. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:49,162][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:49,924][626795] Updated weights for policy 0, policy_version 128272 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:51,749][626795] Updated weights for policy 0, policy_version 128282 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:53,566][626795] Updated weights for policy 0, policy_version 128292 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:53,975][24592] Fps is (10 sec: 33587.4, 60 sec: 43008.1, 300 sec: 43181.6). Total num frames: 1050992640. Throughput: 0: 10388.4. Samples: 12744714. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:53,976][24592] Avg episode reward: [(0, '4.328')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:55,167][626795] Updated weights for policy 0, policy_version 128302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:56,830][626795] Updated weights for policy 0, policy_version 128312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:40:58,527][626795] Updated weights for policy 0, policy_version 128322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:58,975][24592] Fps is (10 sec: 47562.3, 60 sec: 43007.9, 300 sec: 43237.2). Total num frames: 1051230208. Throughput: 0: 11238.7. Samples: 12781032. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:40:58,976][24592] Avg episode reward: [(0, '4.326')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:00,270][626795] Updated weights for policy 0, policy_version 128332 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:01,847][626795] Updated weights for policy 0, policy_version 128342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:03,555][626795] Updated weights for policy 0, policy_version 128352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:03,975][24592] Fps is (10 sec: 48332.5, 60 sec: 43008.3, 300 sec: 43714.2). Total num frames: 1051475968. Throughput: 0: 11179.0. Samples: 12853848. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:03,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000128354_1051475968.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:04,039][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000127079_1041031168.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:05,276][626795] Updated weights for policy 0, policy_version 128362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:07,016][626795] Updated weights for policy 0, policy_version 128372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:08,735][626795] Updated weights for policy 0, policy_version 128382 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:08,975][24592] Fps is (10 sec: 48333.4, 60 sec: 43281.4, 300 sec: 43737.0). Total num frames: 1051713536. Throughput: 0: 11199.8. Samples: 12926868. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:08,976][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:10,356][626795] Updated weights for policy 0, policy_version 128392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:12,053][626795] Updated weights for policy 0, policy_version 128402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:13,817][626795] Updated weights for policy 0, policy_version 128412 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:13,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45597.5, 300 sec: 43764.7). Total num frames: 1051959296. Throughput: 0: 11202.1. Samples: 12962940. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:13,977][24592] Avg episode reward: [(0, '4.403')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:15,452][626795] Updated weights for policy 0, policy_version 128422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:17,271][626795] Updated weights for policy 0, policy_version 128432 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:18,975][24592] Fps is (10 sec: 47513.3, 60 sec: 45466.3, 300 sec: 43736.9). Total num frames: 1052188672. Throughput: 0: 11144.0. Samples: 13033812. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:18,977][24592] Avg episode reward: [(0, '4.374')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:19,104][626795] Updated weights for policy 0, policy_version 128442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:20,889][626795] Updated weights for policy 0, policy_version 128452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:24,174][24592] Fps is (10 sec: 33736.1, 60 sec: 43274.5, 300 sec: 43291.3). Total num frames: 1052303360. Throughput: 0: 10241.4. Samples: 13067646. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:24,175][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:25,482][626795] Updated weights for policy 0, policy_version 128462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:27,123][626795] Updated weights for policy 0, policy_version 128472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:28,850][626795] Updated weights for policy 0, policy_version 128482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:28,975][24592] Fps is (10 sec: 33587.0, 60 sec: 43008.0, 300 sec: 43292.7). Total num frames: 1052524544. Throughput: 0: 10348.0. Samples: 13105302. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:28,979][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:30,502][626795] Updated weights for policy 0, policy_version 128492 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:32,125][626795] Updated weights for policy 0, policy_version 128502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:33,830][626795] Updated weights for policy 0, policy_version 128512 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:33,975][24592] Fps is (10 sec: 47641.2, 60 sec: 43008.0, 300 sec: 43348.3). Total num frames: 1052770304. Throughput: 0: 11268.0. Samples: 13178532. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:33,976][24592] Avg episode reward: [(0, '4.343')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:35,591][626795] Updated weights for policy 0, policy_version 128522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:37,226][626795] Updated weights for policy 0, policy_version 128532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:38,875][626795] Updated weights for policy 0, policy_version 128542 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:38,976][24592] Fps is (10 sec: 49150.5, 60 sec: 43144.3, 300 sec: 43848.7). Total num frames: 1053016064. Throughput: 0: 11272.8. Samples: 13251996. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:38,977][24592] Avg episode reward: [(0, '4.405')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:40,539][626795] Updated weights for policy 0, policy_version 128552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:42,230][626795] Updated weights for policy 0, policy_version 128562 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:43,922][626795] Updated weights for policy 0, policy_version 128572 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:43,975][24592] Fps is (10 sec: 49151.8, 60 sec: 43417.6, 300 sec: 43903.6). Total num frames: 1053261824. Throughput: 0: 11289.1. Samples: 13289040. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:43,976][24592] Avg episode reward: [(0, '4.421')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:45,556][626795] Updated weights for policy 0, policy_version 128582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:47,321][626795] Updated weights for policy 0, policy_version 128592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:48,975][24592] Fps is (10 sec: 48334.6, 60 sec: 45741.3, 300 sec: 43903.6). Total num frames: 1053499392. Throughput: 0: 11272.4. Samples: 13361106. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:48,977][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:49,021][626795] Updated weights for policy 0, policy_version 128602 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:50,829][626795] Updated weights for policy 0, policy_version 128612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:52,612][626795] Updated weights for policy 0, policy_version 128622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:53,976][24592] Fps is (10 sec: 46692.5, 60 sec: 45601.8, 300 sec: 43875.7). Total num frames: 1053728768. Throughput: 0: 11186.3. Samples: 13430256. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:53,977][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:54,461][626795] Updated weights for policy 0, policy_version 128632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:41:56,287][626795] Updated weights for policy 0, policy_version 128642 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:59,139][24592] Fps is (10 sec: 33047.6, 60 sec: 43299.8, 300 sec: 43407.6). Total num frames: 1053835264. Throughput: 0: 10347.3. Samples: 13430256. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:41:59,140][24592] Avg episode reward: [(0, '4.431')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:00,748][626795] Updated weights for policy 0, policy_version 128652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:02,469][626795] Updated weights for policy 0, policy_version 128662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:03,975][24592] Fps is (10 sec: 34407.7, 60 sec: 43281.0, 300 sec: 43459.3). Total num frames: 1054072832. Throughput: 0: 10418.7. Samples: 13502652. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:03,976][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:04,176][626795] Updated weights for policy 0, policy_version 128672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:05,961][626795] Updated weights for policy 0, policy_version 128682 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:07,559][626795] Updated weights for policy 0, policy_version 128692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:08,976][24592] Fps is (10 sec: 48300.7, 60 sec: 43280.8, 300 sec: 43487.0). Total num frames: 1054310400. Throughput: 0: 11313.1. Samples: 13574490. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:08,977][24592] Avg episode reward: [(0, '4.382')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:09,299][626795] Updated weights for policy 0, policy_version 128702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:10,931][626795] Updated weights for policy 0, policy_version 128712 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:12,601][626795] Updated weights for policy 0, policy_version 128722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:13,975][24592] Fps is (10 sec: 47514.0, 60 sec: 43144.6, 300 sec: 43982.7). Total num frames: 1054547968. Throughput: 0: 11243.5. Samples: 13611258. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:13,976][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:14,250][626795] Updated weights for policy 0, policy_version 128732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:16,020][626795] Updated weights for policy 0, policy_version 128742 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:17,705][626795] Updated weights for policy 0, policy_version 128752 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:18,975][24592] Fps is (10 sec: 48334.0, 60 sec: 43417.5, 300 sec: 44042.4). Total num frames: 1054793728. Throughput: 0: 11232.4. Samples: 13683990. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:18,976][24592] Avg episode reward: [(0, '4.424')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:19,447][626795] Updated weights for policy 0, policy_version 128762 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:21,104][626795] Updated weights for policy 0, policy_version 128772 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:22,835][626795] Updated weights for policy 0, policy_version 128782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:23,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45616.7, 300 sec: 44014.7). Total num frames: 1055031296. Throughput: 0: 11193.0. Samples: 13755678. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:23,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:24,540][626795] Updated weights for policy 0, policy_version 128792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:26,421][626795] Updated weights for policy 0, policy_version 128802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:28,224][626795] Updated weights for policy 0, policy_version 128812 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:28,975][24592] Fps is (10 sec: 46694.9, 60 sec: 45602.2, 300 sec: 43986.9). Total num frames: 1055260672. Throughput: 0: 11136.5. Samples: 13790184. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:28,977][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:29,961][626795] Updated weights for policy 0, policy_version 128822 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:34,088][24592] Fps is (10 sec: 33213.5, 60 sec: 43200.0, 300 sec: 43498.3). Total num frames: 1055367168. Throughput: 0: 10254.2. Samples: 13823700. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:34,089][24592] Avg episode reward: [(0, '4.294')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:34,549][626795] Updated weights for policy 0, policy_version 128832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:36,353][626795] Updated weights for policy 0, policy_version 128842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:37,941][626795] Updated weights for policy 0, policy_version 128852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:38,976][24592] Fps is (10 sec: 33586.4, 60 sec: 43008.1, 300 sec: 43487.0). Total num frames: 1055596544. Throughput: 0: 10383.8. Samples: 13897524. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:38,977][24592] Avg episode reward: [(0, '4.376')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:39,686][626795] Updated weights for policy 0, policy_version 128862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:41,435][626795] Updated weights for policy 0, policy_version 128872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:43,139][626795] Updated weights for policy 0, policy_version 128882 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:43,978][24592] Fps is (10 sec: 47214.4, 60 sec: 42869.8, 300 sec: 43514.5). Total num frames: 1055834112. Throughput: 0: 11225.1. Samples: 13933578. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:43,979][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:44,857][626795] Updated weights for policy 0, policy_version 128892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:46,602][626795] Updated weights for policy 0, policy_version 128902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:48,308][626795] Updated weights for policy 0, policy_version 128912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:48,975][24592] Fps is (10 sec: 47514.8, 60 sec: 42871.5, 300 sec: 43974.6). Total num frames: 1056071680. Throughput: 0: 11166.2. Samples: 14005128. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:48,977][24592] Avg episode reward: [(0, '4.343')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:50,068][626795] Updated weights for policy 0, policy_version 128922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:51,726][626795] Updated weights for policy 0, policy_version 128932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:53,525][626795] Updated weights for policy 0, policy_version 128942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:53,975][24592] Fps is (10 sec: 47524.4, 60 sec: 43008.2, 300 sec: 43959.1). Total num frames: 1056309248. Throughput: 0: 11129.7. Samples: 14075322. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:53,977][24592] Avg episode reward: [(0, '4.253')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:55,295][626795] Updated weights for policy 0, policy_version 128952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:56,896][626795] Updated weights for policy 0, policy_version 128962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:42:58,678][626795] Updated weights for policy 0, policy_version 128972 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:58,977][24592] Fps is (10 sec: 47505.6, 60 sec: 45314.6, 300 sec: 43958.9). Total num frames: 1056546816. Throughput: 0: 11113.5. Samples: 14111382. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:42:58,978][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:00,404][626795] Updated weights for policy 0, policy_version 128982 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:02,193][626795] Updated weights for policy 0, policy_version 128992 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:03,804][626795] Updated weights for policy 0, policy_version 129002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:03,975][24592] Fps is (10 sec: 47514.3, 60 sec: 45192.6, 300 sec: 43931.5). Total num frames: 1056784384. Throughput: 0: 11072.3. Samples: 14182242. Policy #0 lag: (min: 1.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:03,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000129002_1056784384.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:04,075][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000127707_1046175744.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:05,657][626795] Updated weights for policy 0, policy_version 129012 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:08,975][24592] Fps is (10 sec: 35231.4, 60 sec: 43144.8, 300 sec: 43514.8). Total num frames: 1056899072. Throughput: 0: 10453.3. Samples: 14226078. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:08,977][24592] Avg episode reward: [(0, '4.239')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:09,819][626795] Updated weights for policy 0, policy_version 129022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:11,648][626795] Updated weights for policy 0, policy_version 129032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:13,345][626795] Updated weights for policy 0, policy_version 129042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:13,975][24592] Fps is (10 sec: 35225.4, 60 sec: 43144.5, 300 sec: 43514.8). Total num frames: 1057136640. Throughput: 0: 10416.8. Samples: 14258940. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:13,976][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:15,118][626795] Updated weights for policy 0, policy_version 129052 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:16,762][626795] Updated weights for policy 0, policy_version 129062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:18,611][626795] Updated weights for policy 0, policy_version 129072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:18,976][24592] Fps is (10 sec: 47512.5, 60 sec: 43007.9, 300 sec: 43542.5). Total num frames: 1057374208. Throughput: 0: 11258.9. Samples: 14329086. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:18,977][24592] Avg episode reward: [(0, '4.362')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:20,265][626795] Updated weights for policy 0, policy_version 129082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:22,031][626795] Updated weights for policy 0, policy_version 129092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:23,787][626795] Updated weights for policy 0, policy_version 129102 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:23,976][24592] Fps is (10 sec: 46693.7, 60 sec: 42871.3, 300 sec: 43991.9). Total num frames: 1057603584. Throughput: 0: 11162.0. Samples: 14399814. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:23,977][24592] Avg episode reward: [(0, '4.394')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:25,500][626795] Updated weights for policy 0, policy_version 129112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:27,264][626795] Updated weights for policy 0, policy_version 129122 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:28,964][626795] Updated weights for policy 0, policy_version 129132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:28,976][24592] Fps is (10 sec: 47512.0, 60 sec: 43144.1, 300 sec: 44014.6). Total num frames: 1057849344. Throughput: 0: 11149.3. Samples: 14435274. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:28,977][24592] Avg episode reward: [(0, '4.296')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:30,733][626795] Updated weights for policy 0, policy_version 129142 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:32,450][626795] Updated weights for policy 0, policy_version 129152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:33,976][24592] Fps is (10 sec: 48331.5, 60 sec: 45413.9, 300 sec: 43986.8). Total num frames: 1058086912. Throughput: 0: 11130.9. Samples: 14506026. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:33,979][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:34,182][626795] Updated weights for policy 0, policy_version 129162 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:35,836][626795] Updated weights for policy 0, policy_version 129172 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:37,663][626795] Updated weights for policy 0, policy_version 129182 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:38,975][24592] Fps is (10 sec: 46697.0, 60 sec: 45329.2, 300 sec: 43931.3). Total num frames: 1058316288. Throughput: 0: 11150.0. Samples: 14577072. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:38,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:39,367][626795] Updated weights for policy 0, policy_version 129192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:43,664][626795] Updated weights for policy 0, policy_version 129202 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:43,976][24592] Fps is (10 sec: 35226.4, 60 sec: 43419.2, 300 sec: 43542.6). Total num frames: 1058439168. Throughput: 0: 11002.9. Samples: 14606496. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:43,976][24592] Avg episode reward: [(0, '4.368')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:45,288][626795] Updated weights for policy 0, policy_version 129212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:47,071][626795] Updated weights for policy 0, policy_version 129222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:48,780][626795] Updated weights for policy 0, policy_version 129232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:48,975][24592] Fps is (10 sec: 35225.6, 60 sec: 43281.0, 300 sec: 43514.8). Total num frames: 1058668544. Throughput: 0: 10501.6. Samples: 14654814. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:48,976][24592] Avg episode reward: [(0, '4.349')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:50,483][626795] Updated weights for policy 0, policy_version 129242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:52,263][626795] Updated weights for policy 0, policy_version 129252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:53,962][626795] Updated weights for policy 0, policy_version 129262 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:53,975][24592] Fps is (10 sec: 47514.5, 60 sec: 43417.7, 300 sec: 43570.3). Total num frames: 1058914304. Throughput: 0: 11102.0. Samples: 14725668. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:53,978][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:55,709][626795] Updated weights for policy 0, policy_version 129272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:57,408][626795] Updated weights for policy 0, policy_version 129282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:58,976][24592] Fps is (10 sec: 48327.8, 60 sec: 43418.1, 300 sec: 44037.0). Total num frames: 1059151872. Throughput: 0: 11150.2. Samples: 14760708. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:43:58,977][24592] Avg episode reward: [(0, '4.369')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:43:59,340][626795] Updated weights for policy 0, policy_version 129292 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:00,948][626795] Updated weights for policy 0, policy_version 129302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:02,688][626795] Updated weights for policy 0, policy_version 129312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:03,975][24592] Fps is (10 sec: 46694.6, 60 sec: 43281.0, 300 sec: 44014.6). Total num frames: 1059381248. Throughput: 0: 11138.3. Samples: 14830308. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:03,977][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:04,528][626795] Updated weights for policy 0, policy_version 129322 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:06,299][626795] Updated weights for policy 0, policy_version 129332 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:08,021][626795] Updated weights for policy 0, policy_version 129342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:08,975][24592] Fps is (10 sec: 45879.7, 60 sec: 45192.5, 300 sec: 43959.1). Total num frames: 1059610624. Throughput: 0: 11128.4. Samples: 14900592. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:08,977][24592] Avg episode reward: [(0, '4.274')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:09,707][626795] Updated weights for policy 0, policy_version 129352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:11,408][626795] Updated weights for policy 0, policy_version 129362 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:13,220][626795] Updated weights for policy 0, policy_version 129372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:13,975][24592] Fps is (10 sec: 46694.4, 60 sec: 45192.5, 300 sec: 43931.3). Total num frames: 1059848192. Throughput: 0: 11132.1. Samples: 14936214. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:13,977][24592] Avg episode reward: [(0, '4.405')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:14,970][626795] Updated weights for policy 0, policy_version 129382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:18,976][24592] Fps is (10 sec: 35224.5, 60 sec: 43144.4, 300 sec: 43487.0). Total num frames: 1059962880. Throughput: 0: 10670.7. Samples: 14986206. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:18,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:19,148][626795] Updated weights for policy 0, policy_version 129392 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:20,850][626795] Updated weights for policy 0, policy_version 129402 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:22,505][626795] Updated weights for policy 0, policy_version 129412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:23,976][24592] Fps is (10 sec: 36044.2, 60 sec: 43417.6, 300 sec: 43542.5). Total num frames: 1060208640. Throughput: 0: 10483.6. Samples: 15048834. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:23,977][24592] Avg episode reward: [(0, '4.387')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:24,348][626795] Updated weights for policy 0, policy_version 129422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:26,056][626795] Updated weights for policy 0, policy_version 129432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:27,816][626795] Updated weights for policy 0, policy_version 129442 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:28,975][24592] Fps is (10 sec: 47515.1, 60 sec: 43144.9, 300 sec: 43542.6). Total num frames: 1060438016. Throughput: 0: 10613.6. Samples: 15084108. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:28,977][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:29,555][626795] Updated weights for policy 0, policy_version 129452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:31,371][626795] Updated weights for policy 0, policy_version 129462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:33,004][626795] Updated weights for policy 0, policy_version 129472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:33,976][24592] Fps is (10 sec: 45875.3, 60 sec: 43008.2, 300 sec: 44001.4). Total num frames: 1060667392. Throughput: 0: 11100.6. Samples: 15154344. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:33,978][24592] Avg episode reward: [(0, '4.276')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:34,820][626795] Updated weights for policy 0, policy_version 129482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:36,579][626795] Updated weights for policy 0, policy_version 129492 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:38,252][626795] Updated weights for policy 0, policy_version 129502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:38,976][24592] Fps is (10 sec: 47512.0, 60 sec: 43280.8, 300 sec: 44014.6). Total num frames: 1060913152. Throughput: 0: 11091.9. Samples: 15224808. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:38,977][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:39,974][626795] Updated weights for policy 0, policy_version 129512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:41,743][626795] Updated weights for policy 0, policy_version 129522 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:43,477][626795] Updated weights for policy 0, policy_version 129532 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:43,976][24592] Fps is (10 sec: 47511.9, 60 sec: 45055.8, 300 sec: 43959.0). Total num frames: 1061142528. Throughput: 0: 11108.7. Samples: 15260592. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:43,977][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:45,223][626795] Updated weights for policy 0, policy_version 129542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:46,924][626795] Updated weights for policy 0, policy_version 129552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:48,787][626795] Updated weights for policy 0, policy_version 129562 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:48,975][24592] Fps is (10 sec: 46696.2, 60 sec: 45192.5, 300 sec: 43959.1). Total num frames: 1061380096. Throughput: 0: 11119.6. Samples: 15330690. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:48,977][24592] Avg episode reward: [(0, '4.253')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:50,426][626795] Updated weights for policy 0, policy_version 129572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:53,975][24592] Fps is (10 sec: 36046.6, 60 sec: 43144.6, 300 sec: 43570.3). Total num frames: 1061502976. Throughput: 0: 10498.7. Samples: 15373032. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:53,976][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:54,678][626795] Updated weights for policy 0, policy_version 129582 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:56,395][626795] Updated weights for policy 0, policy_version 129592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:57,006][626772] Signal inference workers to stop experience collection... (250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:57,006][626772] Signal inference workers to resume experience collection... (250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:57,012][626795] InferenceWorker_p0-w0: stopping experience collection (250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:57,012][626795] InferenceWorker_p0-w0: resuming experience collection (250 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:58,121][626795] Updated weights for policy 0, policy_version 129602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:58,975][24592] Fps is (10 sec: 35225.3, 60 sec: 43008.7, 300 sec: 43514.8). Total num frames: 1061732352. Throughput: 0: 10479.8. Samples: 15407808. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:44:58,978][24592] Avg episode reward: [(0, '4.287')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:44:59,864][626795] Updated weights for policy 0, policy_version 129612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:01,649][626795] Updated weights for policy 0, policy_version 129622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:03,328][626795] Updated weights for policy 0, policy_version 129632 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:03,975][24592] Fps is (10 sec: 46694.2, 60 sec: 43144.5, 300 sec: 43570.4). Total num frames: 1061969920. Throughput: 0: 10942.7. Samples: 15478626. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:03,976][24592] Avg episode reward: [(0, '4.370')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:04,008][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000129636_1061978112.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:04,061][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000128354_1051475968.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:05,196][626795] Updated weights for policy 0, policy_version 129642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:06,803][626795] Updated weights for policy 0, policy_version 129652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:08,593][626795] Updated weights for policy 0, policy_version 129662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:08,975][24592] Fps is (10 sec: 47514.0, 60 sec: 43281.1, 300 sec: 44012.8). Total num frames: 1062207488. Throughput: 0: 11121.4. Samples: 15549294. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:08,977][24592] Avg episode reward: [(0, '4.168')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:10,335][626795] Updated weights for policy 0, policy_version 129672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:12,192][626795] Updated weights for policy 0, policy_version 129682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:13,756][626795] Updated weights for policy 0, policy_version 129692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:13,975][24592] Fps is (10 sec: 47513.7, 60 sec: 43281.1, 300 sec: 44014.8). Total num frames: 1062445056. Throughput: 0: 11119.9. Samples: 15584502. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:13,977][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:15,530][626795] Updated weights for policy 0, policy_version 129702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:17,221][626795] Updated weights for policy 0, policy_version 129712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:18,976][24592] Fps is (10 sec: 46693.5, 60 sec: 45192.7, 300 sec: 43986.9). Total num frames: 1062674432. Throughput: 0: 11143.9. Samples: 15655818. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:18,977][24592] Avg episode reward: [(0, '4.358')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:19,042][626795] Updated weights for policy 0, policy_version 129722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:20,756][626795] Updated weights for policy 0, policy_version 129732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:22,460][626795] Updated weights for policy 0, policy_version 129742 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:23,976][24592] Fps is (10 sec: 46693.3, 60 sec: 45055.9, 300 sec: 43959.1). Total num frames: 1062912000. Throughput: 0: 11139.5. Samples: 15726084. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:23,976][24592] Avg episode reward: [(0, '4.262')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:24,209][626795] Updated weights for policy 0, policy_version 129752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:28,438][626795] Updated weights for policy 0, policy_version 129762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:28,976][24592] Fps is (10 sec: 36044.0, 60 sec: 43280.8, 300 sec: 43542.5). Total num frames: 1063034880. Throughput: 0: 10757.5. Samples: 15744678. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:28,977][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:30,121][626795] Updated weights for policy 0, policy_version 129772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:31,881][626795] Updated weights for policy 0, policy_version 129782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:33,583][626795] Updated weights for policy 0, policy_version 129792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:33,976][24592] Fps is (10 sec: 36044.0, 60 sec: 43417.3, 300 sec: 43542.5). Total num frames: 1063272448. Throughput: 0: 10496.3. Samples: 15803028. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:33,977][24592] Avg episode reward: [(0, '4.410')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:35,406][626795] Updated weights for policy 0, policy_version 129802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:37,112][626795] Updated weights for policy 0, policy_version 129812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:38,844][626795] Updated weights for policy 0, policy_version 129822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:38,976][24592] Fps is (10 sec: 46694.8, 60 sec: 43144.6, 300 sec: 43542.5). Total num frames: 1063501824. Throughput: 0: 11125.9. Samples: 15873702. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:38,978][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:40,622][626795] Updated weights for policy 0, policy_version 129832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:42,306][626795] Updated weights for policy 0, policy_version 129842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:43,976][24592] Fps is (10 sec: 46695.3, 60 sec: 43281.2, 300 sec: 44014.1). Total num frames: 1063739392. Throughput: 0: 11140.5. Samples: 15909132. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:43,977][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:44,057][626795] Updated weights for policy 0, policy_version 129852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:45,858][626795] Updated weights for policy 0, policy_version 129862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:47,642][626795] Updated weights for policy 0, policy_version 129872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:48,975][24592] Fps is (10 sec: 46695.7, 60 sec: 43144.5, 300 sec: 43986.9). Total num frames: 1063968768. Throughput: 0: 11113.9. Samples: 15978750. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:48,976][24592] Avg episode reward: [(0, '4.366')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:49,398][626795] Updated weights for policy 0, policy_version 129882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:51,060][626795] Updated weights for policy 0, policy_version 129892 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:52,884][626795] Updated weights for policy 0, policy_version 129902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:53,976][24592] Fps is (10 sec: 46694.6, 60 sec: 45055.8, 300 sec: 43986.8). Total num frames: 1064206336. Throughput: 0: 11093.1. Samples: 16048488. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:53,978][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:54,710][626795] Updated weights for policy 0, policy_version 129912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:56,442][626795] Updated weights for policy 0, policy_version 129922 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:45:58,068][626795] Updated weights for policy 0, policy_version 129932 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:58,975][24592] Fps is (10 sec: 46694.6, 60 sec: 45056.1, 300 sec: 43931.3). Total num frames: 1064435712. Throughput: 0: 11099.5. Samples: 16083978. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:45:58,976][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:02,310][626795] Updated weights for policy 0, policy_version 129942 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:03,975][24592] Fps is (10 sec: 35226.3, 60 sec: 43144.5, 300 sec: 43542.5). Total num frames: 1064558592. Throughput: 0: 10450.8. Samples: 16126104. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:03,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:04,005][626795] Updated weights for policy 0, policy_version 129952 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:05,716][626795] Updated weights for policy 0, policy_version 129962 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:07,488][626795] Updated weights for policy 0, policy_version 129972 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:08,975][24592] Fps is (10 sec: 36044.3, 60 sec: 43144.4, 300 sec: 43514.8). Total num frames: 1064796160. Throughput: 0: 10472.0. Samples: 16197324. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:08,977][24592] Avg episode reward: [(0, '4.420')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:09,172][626795] Updated weights for policy 0, policy_version 129982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:10,879][626795] Updated weights for policy 0, policy_version 129992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:12,699][626795] Updated weights for policy 0, policy_version 130002 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:13,976][24592] Fps is (10 sec: 47512.6, 60 sec: 43144.4, 300 sec: 43542.5). Total num frames: 1065033728. Throughput: 0: 10841.0. Samples: 16232520. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:13,977][24592] Avg episode reward: [(0, '4.403')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:14,441][626795] Updated weights for policy 0, policy_version 130012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:16,185][626795] Updated weights for policy 0, policy_version 130022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:18,003][626795] Updated weights for policy 0, policy_version 130032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:18,976][24592] Fps is (10 sec: 47513.5, 60 sec: 43281.1, 300 sec: 43988.7). Total num frames: 1065271296. Throughput: 0: 11116.6. Samples: 16303272. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:18,977][24592] Avg episode reward: [(0, '4.423')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:19,632][626795] Updated weights for policy 0, policy_version 130042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:21,319][626795] Updated weights for policy 0, policy_version 130052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:23,113][626795] Updated weights for policy 0, policy_version 130062 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:23,975][24592] Fps is (10 sec: 47514.8, 60 sec: 43281.3, 300 sec: 44014.7). Total num frames: 1065508864. Throughput: 0: 11126.2. Samples: 16374378. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:23,976][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:24,869][626795] Updated weights for policy 0, policy_version 130072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:26,523][626795] Updated weights for policy 0, policy_version 130082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:28,375][626795] Updated weights for policy 0, policy_version 130092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:28,976][24592] Fps is (10 sec: 46693.9, 60 sec: 45056.1, 300 sec: 43959.1). Total num frames: 1065738240. Throughput: 0: 11118.4. Samples: 16409460. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:28,977][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:30,081][626795] Updated weights for policy 0, policy_version 130102 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:31,737][626795] Updated weights for policy 0, policy_version 130112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:33,533][626795] Updated weights for policy 0, policy_version 130122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:33,975][24592] Fps is (10 sec: 46694.0, 60 sec: 45056.3, 300 sec: 43931.4). Total num frames: 1065975808. Throughput: 0: 11158.1. Samples: 16480866. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:33,977][24592] Avg episode reward: [(0, '4.393')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:37,710][626795] Updated weights for policy 0, policy_version 130132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:38,975][24592] Fps is (10 sec: 36045.7, 60 sec: 43281.3, 300 sec: 43514.8). Total num frames: 1066098688. Throughput: 0: 10533.4. Samples: 16522488. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:38,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:39,379][626795] Updated weights for policy 0, policy_version 130142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:41,172][626795] Updated weights for policy 0, policy_version 130152 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:42,868][626795] Updated weights for policy 0, policy_version 130162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:43,975][24592] Fps is (10 sec: 35226.0, 60 sec: 43144.8, 300 sec: 43487.0). Total num frames: 1066328064. Throughput: 0: 10529.7. Samples: 16557816. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:43,977][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:44,594][626795] Updated weights for policy 0, policy_version 130172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:46,276][626795] Updated weights for policy 0, policy_version 130182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:47,998][626795] Updated weights for policy 0, policy_version 130192 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:48,975][24592] Fps is (10 sec: 47513.4, 60 sec: 43417.6, 300 sec: 43542.6). Total num frames: 1066573824. Throughput: 0: 11184.8. Samples: 16629420. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:48,978][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:49,753][626795] Updated weights for policy 0, policy_version 130202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:51,522][626795] Updated weights for policy 0, policy_version 130212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:53,164][626795] Updated weights for policy 0, policy_version 130222 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:53,975][24592] Fps is (10 sec: 48332.7, 60 sec: 43417.8, 300 sec: 44011.2). Total num frames: 1066811392. Throughput: 0: 11182.7. Samples: 16700544. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:53,977][24592] Avg episode reward: [(0, '4.423')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:54,960][626795] Updated weights for policy 0, policy_version 130232 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:56,601][626795] Updated weights for policy 0, policy_version 130242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:46:58,450][626795] Updated weights for policy 0, policy_version 130252 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:58,976][24592] Fps is (10 sec: 47512.4, 60 sec: 43553.9, 300 sec: 43986.8). Total num frames: 1067048960. Throughput: 0: 11186.0. Samples: 16735890. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:46:58,977][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:00,157][626795] Updated weights for policy 0, policy_version 130262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:01,857][626795] Updated weights for policy 0, policy_version 130272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:03,537][626795] Updated weights for policy 0, policy_version 130282 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:03,975][24592] Fps is (10 sec: 46694.5, 60 sec: 45329.1, 300 sec: 43959.2). Total num frames: 1067278336. Throughput: 0: 11195.0. Samples: 16807044. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:03,976][24592] Avg episode reward: [(0, '4.317')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000130283_1067278336.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:04,033][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000129002_1056784384.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:05,317][626795] Updated weights for policy 0, policy_version 130292 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:07,105][626795] Updated weights for policy 0, policy_version 130302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:08,823][626795] Updated weights for policy 0, policy_version 130312 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:08,976][24592] Fps is (10 sec: 47513.9, 60 sec: 45465.5, 300 sec: 43986.8). Total num frames: 1067524096. Throughput: 0: 11188.6. Samples: 16877868. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:08,978][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:12,950][626795] Updated weights for policy 0, policy_version 130322 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:13,975][24592] Fps is (10 sec: 36044.8, 60 sec: 43417.8, 300 sec: 43542.6). Total num frames: 1067638784. Throughput: 0: 10590.9. Samples: 16886046. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:13,976][24592] Avg episode reward: [(0, '4.433')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:14,693][626795] Updated weights for policy 0, policy_version 130332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:16,500][626795] Updated weights for policy 0, policy_version 130342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:18,185][626795] Updated weights for policy 0, policy_version 130352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:18,975][24592] Fps is (10 sec: 35226.5, 60 sec: 43417.7, 300 sec: 43542.6). Total num frames: 1067876352. Throughput: 0: 10533.1. Samples: 16954854. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:18,978][24592] Avg episode reward: [(0, '4.292')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:20,029][626795] Updated weights for policy 0, policy_version 130362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:21,606][626795] Updated weights for policy 0, policy_version 130372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:23,342][626795] Updated weights for policy 0, policy_version 130382 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:23,976][24592] Fps is (10 sec: 47510.6, 60 sec: 43417.1, 300 sec: 43570.2). Total num frames: 1068113920. Throughput: 0: 11198.6. Samples: 17026434. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:23,977][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:25,163][626795] Updated weights for policy 0, policy_version 130392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:26,839][626795] Updated weights for policy 0, policy_version 130402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:28,559][626795] Updated weights for policy 0, policy_version 130412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:28,975][24592] Fps is (10 sec: 46694.2, 60 sec: 43417.8, 300 sec: 44003.7). Total num frames: 1068343296. Throughput: 0: 11189.2. Samples: 17061330. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:28,977][24592] Avg episode reward: [(0, '4.417')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:30,328][626795] Updated weights for policy 0, policy_version 130422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:32,051][626795] Updated weights for policy 0, policy_version 130432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:33,620][626795] Updated weights for policy 0, policy_version 130442 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:33,975][24592] Fps is (10 sec: 47516.1, 60 sec: 43554.1, 300 sec: 44042.4). Total num frames: 1068589056. Throughput: 0: 11187.1. Samples: 17132838. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:33,977][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:35,498][626795] Updated weights for policy 0, policy_version 130452 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:37,214][626795] Updated weights for policy 0, policy_version 130462 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:38,959][626795] Updated weights for policy 0, policy_version 130472 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:38,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45465.6, 300 sec: 44042.8). Total num frames: 1068826624. Throughput: 0: 11193.9. Samples: 17204268. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:38,976][24592] Avg episode reward: [(0, '4.446')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:40,670][626795] Updated weights for policy 0, policy_version 130482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:42,371][626795] Updated weights for policy 0, policy_version 130492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45602.0, 300 sec: 44042.4). Total num frames: 1069064192. Throughput: 0: 11194.8. Samples: 17239656. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:43,977][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:46,587][626795] Updated weights for policy 0, policy_version 130502 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:48,342][626795] Updated weights for policy 0, policy_version 130512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:48,975][24592] Fps is (10 sec: 35225.4, 60 sec: 43417.6, 300 sec: 43625.9). Total num frames: 1069178880. Throughput: 0: 10543.2. Samples: 17281488. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:48,977][24592] Avg episode reward: [(0, '4.431')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:50,047][626795] Updated weights for policy 0, policy_version 130522 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:51,836][626795] Updated weights for policy 0, policy_version 130532 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:53,554][626795] Updated weights for policy 0, policy_version 130542 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:53,976][24592] Fps is (10 sec: 36043.1, 60 sec: 43553.7, 300 sec: 43653.8). Total num frames: 1069424640. Throughput: 0: 10547.3. Samples: 17352498. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:53,977][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:55,206][626795] Updated weights for policy 0, policy_version 130552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:56,944][626795] Updated weights for policy 0, policy_version 130562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:47:58,697][626795] Updated weights for policy 0, policy_version 130572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:58,975][24592] Fps is (10 sec: 48333.1, 60 sec: 43554.3, 300 sec: 43653.6). Total num frames: 1069662208. Throughput: 0: 11159.9. Samples: 17388240. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:47:58,976][24592] Avg episode reward: [(0, '4.310')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:00,334][626795] Updated weights for policy 0, policy_version 130582 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:02,082][626795] Updated weights for policy 0, policy_version 130592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:03,880][626795] Updated weights for policy 0, policy_version 130602 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:03,975][24592] Fps is (10 sec: 46696.8, 60 sec: 43554.1, 300 sec: 44042.4). Total num frames: 1069891584. Throughput: 0: 11224.4. Samples: 17459952. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:03,977][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:05,489][626795] Updated weights for policy 0, policy_version 130612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:07,174][626795] Updated weights for policy 0, policy_version 130622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:08,859][626795] Updated weights for policy 0, policy_version 130632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:08,976][24592] Fps is (10 sec: 47512.8, 60 sec: 43554.2, 300 sec: 44070.2). Total num frames: 1070137344. Throughput: 0: 11243.7. Samples: 17532396. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:08,976][24592] Avg episode reward: [(0, '4.330')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:10,626][626795] Updated weights for policy 0, policy_version 130642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:12,294][626795] Updated weights for policy 0, policy_version 130652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:13,975][24592] Fps is (10 sec: 48333.2, 60 sec: 45602.1, 300 sec: 44070.2). Total num frames: 1070374912. Throughput: 0: 11259.7. Samples: 17568018. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:13,976][24592] Avg episode reward: [(0, '4.459')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:13,994][626795] Updated weights for policy 0, policy_version 130662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:15,753][626795] Updated weights for policy 0, policy_version 130672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:17,535][626795] Updated weights for policy 0, policy_version 130682 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:21,429][24592] Fps is (10 sec: 37493.6, 60 sec: 43679.0, 300 sec: 43706.6). Total num frames: 1070604288. Throughput: 0: 10676.6. Samples: 17639484. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:21,431][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:21,883][626795] Updated weights for policy 0, policy_version 130692 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:23,568][626795] Updated weights for policy 0, policy_version 130702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:23,975][24592] Fps is (10 sec: 35225.5, 60 sec: 43554.6, 300 sec: 43653.7). Total num frames: 1070727168. Throughput: 0: 10558.1. Samples: 17679384. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:23,976][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:25,255][626795] Updated weights for policy 0, policy_version 130712 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:27,020][626795] Updated weights for policy 0, policy_version 130722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:28,711][626795] Updated weights for policy 0, policy_version 130732 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:28,975][24592] Fps is (10 sec: 47767.9, 60 sec: 43690.7, 300 sec: 43653.7). Total num frames: 1070964736. Throughput: 0: 10553.5. Samples: 17714562. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:28,976][24592] Avg episode reward: [(0, '4.388')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:30,434][626795] Updated weights for policy 0, policy_version 130742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:32,182][626795] Updated weights for policy 0, policy_version 130752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:33,773][626795] Updated weights for policy 0, policy_version 130762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:33,975][24592] Fps is (10 sec: 47513.9, 60 sec: 43554.2, 300 sec: 43681.4). Total num frames: 1071202304. Throughput: 0: 11223.5. Samples: 17786544. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:33,976][24592] Avg episode reward: [(0, '4.368')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:35,589][626795] Updated weights for policy 0, policy_version 130772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:37,317][626795] Updated weights for policy 0, policy_version 130782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:38,922][626795] Updated weights for policy 0, policy_version 130792 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:38,976][24592] Fps is (10 sec: 48331.6, 60 sec: 43690.5, 300 sec: 44098.0). Total num frames: 1071448064. Throughput: 0: 11262.5. Samples: 17859306. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:38,976][24592] Avg episode reward: [(0, '4.358')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:40,724][626795] Updated weights for policy 0, policy_version 130802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:42,323][626795] Updated weights for policy 0, policy_version 130812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:43,976][24592] Fps is (10 sec: 48330.2, 60 sec: 43690.4, 300 sec: 44125.6). Total num frames: 1071685632. Throughput: 0: 11260.8. Samples: 17894982. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:43,978][24592] Avg episode reward: [(0, '4.369')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:44,085][626795] Updated weights for policy 0, policy_version 130822 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:45,859][626795] Updated weights for policy 0, policy_version 130832 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:47,425][626795] Updated weights for policy 0, policy_version 130842 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:48,975][24592] Fps is (10 sec: 47514.7, 60 sec: 45738.7, 300 sec: 44098.0). Total num frames: 1071923200. Throughput: 0: 11262.8. Samples: 17966778. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:48,977][24592] Avg episode reward: [(0, '4.158')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:49,285][626795] Updated weights for policy 0, policy_version 130852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:51,083][626795] Updated weights for policy 0, policy_version 130862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:52,837][626795] Updated weights for policy 0, policy_version 130872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:56,443][24592] Fps is (10 sec: 36138.9, 60 sec: 43407.5, 300 sec: 43649.6). Total num frames: 1072136192. Throughput: 0: 10618.9. Samples: 18036450. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:56,445][24592] Avg episode reward: [(0, '4.355')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:57,345][626795] Updated weights for policy 0, policy_version 130882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:58,976][24592] Fps is (10 sec: 33586.5, 60 sec: 43280.9, 300 sec: 43653.6). Total num frames: 1072259072. Throughput: 0: 10468.0. Samples: 18039078. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:48:58,979][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:48:59,169][626795] Updated weights for policy 0, policy_version 130892 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:00,884][626795] Updated weights for policy 0, policy_version 130902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:02,723][626795] Updated weights for policy 0, policy_version 130912 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:03,975][24592] Fps is (10 sec: 46767.3, 60 sec: 43281.1, 300 sec: 43653.6). Total num frames: 1072488448. Throughput: 0: 11011.3. Samples: 18107970. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:03,976][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000130919_1072488448.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:04,041][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000129636_1061978112.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:04,447][626795] Updated weights for policy 0, policy_version 130922 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:06,229][626795] Updated weights for policy 0, policy_version 130932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:07,881][626795] Updated weights for policy 0, policy_version 130942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:08,975][24592] Fps is (10 sec: 46694.8, 60 sec: 43144.6, 300 sec: 43653.6). Total num frames: 1072726016. Throughput: 0: 11087.3. Samples: 18178314. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:08,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:09,651][626795] Updated weights for policy 0, policy_version 130952 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:11,348][626795] Updated weights for policy 0, policy_version 130962 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:13,066][626795] Updated weights for policy 0, policy_version 130972 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:13,975][24592] Fps is (10 sec: 47513.7, 60 sec: 43144.5, 300 sec: 44070.2). Total num frames: 1072963584. Throughput: 0: 11099.7. Samples: 18214050. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:13,977][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:14,762][626795] Updated weights for policy 0, policy_version 130982 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:16,406][626795] Updated weights for policy 0, policy_version 130992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:18,072][626795] Updated weights for policy 0, policy_version 131002 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:18,975][24592] Fps is (10 sec: 48333.2, 60 sec: 45269.2, 300 sec: 44070.2). Total num frames: 1073209344. Throughput: 0: 11133.9. Samples: 18287568. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:18,976][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:19,752][626795] Updated weights for policy 0, policy_version 131012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:21,402][626795] Updated weights for policy 0, policy_version 131022 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:23,227][626795] Updated weights for policy 0, policy_version 131032 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:23,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45329.1, 300 sec: 44098.0). Total num frames: 1073446912. Throughput: 0: 11119.6. Samples: 18359688. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:23,977][24592] Avg episode reward: [(0, '4.450')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:24,875][626795] Updated weights for policy 0, policy_version 131042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:26,670][626795] Updated weights for policy 0, policy_version 131052 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:28,400][626795] Updated weights for policy 0, policy_version 131062 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:31,330][24592] Fps is (10 sec: 37131.6, 60 sec: 43354.5, 300 sec: 43721.2). Total num frames: 1073668096. Throughput: 0: 10563.2. Samples: 18395196. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:31,336][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:32,870][626795] Updated weights for policy 0, policy_version 131072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:33,975][24592] Fps is (10 sec: 33587.5, 60 sec: 43008.0, 300 sec: 43625.9). Total num frames: 1073782784. Throughput: 0: 10368.1. Samples: 18433344. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:33,976][24592] Avg episode reward: [(0, '4.162')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:34,658][626795] Updated weights for policy 0, policy_version 131082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:36,454][626795] Updated weights for policy 0, policy_version 131092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:38,146][626795] Updated weights for policy 0, policy_version 131102 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:38,975][24592] Fps is (10 sec: 46075.3, 60 sec: 42871.6, 300 sec: 43653.7). Total num frames: 1074020352. Throughput: 0: 10982.0. Samples: 18503538. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:38,977][24592] Avg episode reward: [(0, '4.328')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:39,802][626795] Updated weights for policy 0, policy_version 131112 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:41,515][626795] Updated weights for policy 0, policy_version 131122 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:43,184][626795] Updated weights for policy 0, policy_version 131132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:43,975][24592] Fps is (10 sec: 48332.1, 60 sec: 43008.3, 300 sec: 43681.4). Total num frames: 1074266112. Throughput: 0: 11130.6. Samples: 18539952. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:43,976][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:44,836][626795] Updated weights for policy 0, policy_version 131142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:46,626][626795] Updated weights for policy 0, policy_version 131152 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:48,251][626795] Updated weights for policy 0, policy_version 131162 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:48,975][24592] Fps is (10 sec: 49151.7, 60 sec: 43144.5, 300 sec: 44097.9). Total num frames: 1074511872. Throughput: 0: 11222.5. Samples: 18612984. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:48,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:49,965][626795] Updated weights for policy 0, policy_version 131172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:51,642][626795] Updated weights for policy 0, policy_version 131182 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:53,511][626795] Updated weights for policy 0, policy_version 131192 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:53,976][24592] Fps is (10 sec: 48329.0, 60 sec: 45421.8, 300 sec: 44125.6). Total num frames: 1074749440. Throughput: 0: 11255.4. Samples: 18684816. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:53,977][24592] Avg episode reward: [(0, '4.339')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:55,131][626795] Updated weights for policy 0, policy_version 131202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:56,926][626795] Updated weights for policy 0, policy_version 131212 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:49:58,670][626795] Updated weights for policy 0, policy_version 131222 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:58,975][24592] Fps is (10 sec: 46694.6, 60 sec: 45329.2, 300 sec: 44098.0). Total num frames: 1074978816. Throughput: 0: 11243.1. Samples: 18719988. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:49:58,977][24592] Avg episode reward: [(0, '4.343')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:00,468][626795] Updated weights for policy 0, policy_version 131232 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:02,188][626795] Updated weights for policy 0, policy_version 131242 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:06,251][24592] Fps is (10 sec: 36038.7, 60 sec: 43409.6, 300 sec: 43677.7). Total num frames: 1075191808. Throughput: 0: 10619.8. Samples: 18789624. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:06,253][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:06,657][626795] Updated weights for policy 0, policy_version 131252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:08,464][626795] Updated weights for policy 0, policy_version 131262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:08,976][24592] Fps is (10 sec: 34405.6, 60 sec: 43281.0, 300 sec: 43653.6). Total num frames: 1075322880. Throughput: 0: 10398.5. Samples: 18827622. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:08,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:10,140][626795] Updated weights for policy 0, policy_version 131272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:11,818][626795] Updated weights for policy 0, policy_version 131282 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:13,621][626795] Updated weights for policy 0, policy_version 131292 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:13,975][24592] Fps is (10 sec: 46664.1, 60 sec: 43144.6, 300 sec: 43653.7). Total num frames: 1075552256. Throughput: 0: 10979.5. Samples: 18863418. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:13,976][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:15,392][626795] Updated weights for policy 0, policy_version 131302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:16,991][626795] Updated weights for policy 0, policy_version 131312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:18,595][626795] Updated weights for policy 0, policy_version 131322 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:18,975][24592] Fps is (10 sec: 47514.5, 60 sec: 43144.5, 300 sec: 43681.4). Total num frames: 1075798016. Throughput: 0: 11171.4. Samples: 18936060. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:18,976][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:20,348][626795] Updated weights for policy 0, policy_version 131332 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:21,959][626795] Updated weights for policy 0, policy_version 131342 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:23,710][626795] Updated weights for policy 0, policy_version 131352 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:23,975][24592] Fps is (10 sec: 49152.1, 60 sec: 43281.1, 300 sec: 44098.0). Total num frames: 1076043776. Throughput: 0: 11231.7. Samples: 19008966. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:23,976][24592] Avg episode reward: [(0, '4.366')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:25,449][626795] Updated weights for policy 0, policy_version 131362 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:27,085][626795] Updated weights for policy 0, policy_version 131372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:28,728][626795] Updated weights for policy 0, policy_version 131382 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:28,975][24592] Fps is (10 sec: 49151.8, 60 sec: 45475.4, 300 sec: 44125.8). Total num frames: 1076289536. Throughput: 0: 11234.9. Samples: 19045524. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:28,977][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:30,578][626795] Updated weights for policy 0, policy_version 131392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:32,335][626795] Updated weights for policy 0, policy_version 131402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:33,975][24592] Fps is (10 sec: 47513.2, 60 sec: 45602.1, 300 sec: 44125.8). Total num frames: 1076518912. Throughput: 0: 11171.5. Samples: 19115700. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:33,977][24592] Avg episode reward: [(0, '4.350')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:34,087][626795] Updated weights for policy 0, policy_version 131412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:35,878][626795] Updated weights for policy 0, policy_version 131422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:37,480][626795] Updated weights for policy 0, policy_version 131432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:41,335][24592] Fps is (10 sec: 35127.8, 60 sec: 43350.9, 300 sec: 43665.4). Total num frames: 1076723712. Throughput: 0: 9839.9. Samples: 19150824. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:41,337][24592] Avg episode reward: [(0, '4.332')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:42,312][626795] Updated weights for policy 0, policy_version 131442 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:42,321][626772] Signal inference workers to stop experience collection... (300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:42,329][626772] Signal inference workers to resume experience collection... (300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:42,337][626795] InferenceWorker_p0-w0: stopping experience collection (300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:42,344][626795] InferenceWorker_p0-w0: resuming experience collection (300 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:43,927][626795] Updated weights for policy 0, policy_version 131452 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:43,975][24592] Fps is (10 sec: 33587.3, 60 sec: 43144.6, 300 sec: 43681.4). Total num frames: 1076854784. Throughput: 0: 10397.6. Samples: 19187880. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:43,976][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:45,616][626795] Updated weights for policy 0, policy_version 131462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:47,387][626795] Updated weights for policy 0, policy_version 131472 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:48,976][24592] Fps is (10 sec: 48249.3, 60 sec: 43007.8, 300 sec: 43681.4). Total num frames: 1077092352. Throughput: 0: 10977.5. Samples: 19258632. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:48,977][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:49,135][626795] Updated weights for policy 0, policy_version 131482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:50,840][626795] Updated weights for policy 0, policy_version 131492 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:52,434][626795] Updated weights for policy 0, policy_version 131502 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:53,976][24592] Fps is (10 sec: 47512.0, 60 sec: 43008.4, 300 sec: 43709.1). Total num frames: 1077329920. Throughput: 0: 11176.6. Samples: 19330572. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:53,977][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:54,217][626795] Updated weights for policy 0, policy_version 131512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:55,913][626795] Updated weights for policy 0, policy_version 131522 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:57,606][626795] Updated weights for policy 0, policy_version 131532 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:58,975][24592] Fps is (10 sec: 48334.2, 60 sec: 43281.1, 300 sec: 44125.7). Total num frames: 1077575680. Throughput: 0: 11196.8. Samples: 19367274. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:50:58,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:50:59,227][626795] Updated weights for policy 0, policy_version 131542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:00,925][626795] Updated weights for policy 0, policy_version 131552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:02,578][626795] Updated weights for policy 0, policy_version 131562 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:03,976][24592] Fps is (10 sec: 48332.6, 60 sec: 45412.8, 300 sec: 44125.7). Total num frames: 1077813248. Throughput: 0: 11203.0. Samples: 19440198. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:03,977][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000131569_1077813248.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:04,039][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000130283_1067278336.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:04,373][626795] Updated weights for policy 0, policy_version 131572 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:06,128][626795] Updated weights for policy 0, policy_version 131582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:08,000][626795] Updated weights for policy 0, policy_version 131592 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:08,975][24592] Fps is (10 sec: 46694.5, 60 sec: 45329.3, 300 sec: 44098.0). Total num frames: 1078042624. Throughput: 0: 11113.5. Samples: 19509072. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:08,977][24592] Avg episode reward: [(0, '4.405')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:09,685][626795] Updated weights for policy 0, policy_version 131602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:11,451][626795] Updated weights for policy 0, policy_version 131612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:13,297][626795] Updated weights for policy 0, policy_version 131622 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:16,264][24592] Fps is (10 sec: 36664.7, 60 sec: 43531.8, 300 sec: 43703.3). Total num frames: 1078263808. Throughput: 0: 10554.0. Samples: 19544610. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:16,266][24592] Avg episode reward: [(0, '4.313')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:17,634][626795] Updated weights for policy 0, policy_version 131632 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:18,975][24592] Fps is (10 sec: 34406.4, 60 sec: 43144.6, 300 sec: 43653.6). Total num frames: 1078386688. Throughput: 0: 10378.2. Samples: 19582716. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:18,976][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:19,373][626795] Updated weights for policy 0, policy_version 131642 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:21,129][626795] Updated weights for policy 0, policy_version 131652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:22,821][626795] Updated weights for policy 0, policy_version 131662 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:23,975][24592] Fps is (10 sec: 47807.3, 60 sec: 43144.5, 300 sec: 43709.2). Total num frames: 1078632448. Throughput: 0: 11823.3. Samples: 19654968. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:23,976][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:24,489][626795] Updated weights for policy 0, policy_version 131672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:26,201][626795] Updated weights for policy 0, policy_version 131682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:27,878][626795] Updated weights for policy 0, policy_version 131692 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:28,976][24592] Fps is (10 sec: 48330.9, 60 sec: 43007.8, 300 sec: 43709.1). Total num frames: 1078870016. Throughput: 0: 11186.0. Samples: 19691256. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:28,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:29,510][626795] Updated weights for policy 0, policy_version 131702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:31,185][626795] Updated weights for policy 0, policy_version 131712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:32,874][626795] Updated weights for policy 0, policy_version 131722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:33,975][24592] Fps is (10 sec: 48332.2, 60 sec: 43281.0, 300 sec: 44125.7). Total num frames: 1079115776. Throughput: 0: 11232.0. Samples: 19764072. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:33,978][24592] Avg episode reward: [(0, '4.124')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:34,590][626795] Updated weights for policy 0, policy_version 131732 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:36,286][626795] Updated weights for policy 0, policy_version 131742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:37,938][626795] Updated weights for policy 0, policy_version 131752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:38,975][24592] Fps is (10 sec: 48334.7, 60 sec: 45621.6, 300 sec: 44153.5). Total num frames: 1079353344. Throughput: 0: 11264.2. Samples: 19837458. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:38,976][24592] Avg episode reward: [(0, '4.239')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:39,696][626795] Updated weights for policy 0, policy_version 131762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:41,364][626795] Updated weights for policy 0, policy_version 131772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:43,126][626795] Updated weights for policy 0, policy_version 131782 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:43,975][24592] Fps is (10 sec: 47514.3, 60 sec: 45602.2, 300 sec: 44125.7). Total num frames: 1079590912. Throughput: 0: 11232.5. Samples: 19872738. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:43,977][24592] Avg episode reward: [(0, '4.340')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:44,890][626795] Updated weights for policy 0, policy_version 131792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:46,572][626795] Updated weights for policy 0, policy_version 131802 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:48,489][626795] Updated weights for policy 0, policy_version 131812 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:51,408][24592] Fps is (10 sec: 36241.3, 60 sec: 43432.1, 300 sec: 43682.3). Total num frames: 1079803904. Throughput: 0: 10603.8. Samples: 19943154. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:51,409][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:53,216][626795] Updated weights for policy 0, policy_version 131822 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:53,975][24592] Fps is (10 sec: 31948.2, 60 sec: 43008.1, 300 sec: 43598.1). Total num frames: 1079910400. Throughput: 0: 10397.4. Samples: 19976958. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:53,976][24592] Avg episode reward: [(0, '4.385')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:55,031][626795] Updated weights for policy 0, policy_version 131832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:56,965][626795] Updated weights for policy 0, policy_version 131842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:51:58,828][626795] Updated weights for policy 0, policy_version 131852 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:58,976][24592] Fps is (10 sec: 44379.1, 60 sec: 42734.6, 300 sec: 43598.0). Total num frames: 1080139776. Throughput: 0: 10899.0. Samples: 20010120. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:51:58,977][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:00,627][626795] Updated weights for policy 0, policy_version 131862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:02,397][626795] Updated weights for policy 0, policy_version 131872 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:03,976][24592] Fps is (10 sec: 45055.5, 60 sec: 42461.9, 300 sec: 43514.8). Total num frames: 1080360960. Throughput: 0: 10982.1. Samples: 20076912. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:03,978][24592] Avg episode reward: [(0, '4.360')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:04,268][626795] Updated weights for policy 0, policy_version 131882 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:06,227][626795] Updated weights for policy 0, policy_version 131892 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:08,173][626795] Updated weights for policy 0, policy_version 131902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:08,976][24592] Fps is (10 sec: 42599.0, 60 sec: 42052.0, 300 sec: 43820.2). Total num frames: 1080565760. Throughput: 0: 10786.6. Samples: 20140368. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:08,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:10,279][626795] Updated weights for policy 0, policy_version 131912 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:12,240][626795] Updated weights for policy 0, policy_version 131922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:13,976][24592] Fps is (10 sec: 40959.2, 60 sec: 43436.0, 300 sec: 43709.1). Total num frames: 1080770560. Throughput: 0: 10662.4. Samples: 20171064. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:13,977][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:14,230][626795] Updated weights for policy 0, policy_version 131932 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:16,385][626795] Updated weights for policy 0, policy_version 131942 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:18,302][626795] Updated weights for policy 0, policy_version 131952 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:18,976][24592] Fps is (10 sec: 40958.9, 60 sec: 43144.1, 300 sec: 43598.1). Total num frames: 1080975360. Throughput: 0: 10378.0. Samples: 20231088. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:18,977][24592] Avg episode reward: [(0, '4.327')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:20,349][626795] Updated weights for policy 0, policy_version 131962 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:22,436][626795] Updated weights for policy 0, policy_version 131972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:23,239][626795] Updated weights for policy 0, policy_version 131982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:23,976][24592] Fps is (10 sec: 45054.2, 60 sec: 43143.9, 300 sec: 43653.5). Total num frames: 1081221120. Throughput: 0: 10112.1. Samples: 20292510. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:23,981][24592] Avg episode reward: [(0, '4.150')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:25,223][626795] Updated weights for policy 0, policy_version 131992 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:27,153][626795] Updated weights for policy 0, policy_version 132002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:28,969][626795] Updated weights for policy 0, policy_version 132012 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:28,976][24592] Fps is (10 sec: 46695.2, 60 sec: 42871.5, 300 sec: 43570.3). Total num frames: 1081442304. Throughput: 0: 10281.9. Samples: 20335428. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:28,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:30,801][626795] Updated weights for policy 0, policy_version 132022 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:32,470][626795] Updated weights for policy 0, policy_version 132032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:33,976][24592] Fps is (10 sec: 45058.6, 60 sec: 42598.3, 300 sec: 43542.5). Total num frames: 1081671680. Throughput: 0: 10822.8. Samples: 20403858. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:33,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:34,276][626795] Updated weights for policy 0, policy_version 132042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:35,897][626795] Updated weights for policy 0, policy_version 132052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:37,690][626795] Updated weights for policy 0, policy_version 132062 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:38,976][24592] Fps is (10 sec: 46691.6, 60 sec: 42597.7, 300 sec: 43542.4). Total num frames: 1081909248. Throughput: 0: 11069.8. Samples: 20475108. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:38,979][24592] Avg episode reward: [(0, '4.277')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:39,393][626795] Updated weights for policy 0, policy_version 132072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:41,171][626795] Updated weights for policy 0, policy_version 132082 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:42,804][626795] Updated weights for policy 0, policy_version 132092 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:43,975][24592] Fps is (10 sec: 47515.0, 60 sec: 42598.4, 300 sec: 43959.1). Total num frames: 1082146816. Throughput: 0: 11127.7. Samples: 20510862. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:43,977][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:44,503][626795] Updated weights for policy 0, policy_version 132102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:46,298][626795] Updated weights for policy 0, policy_version 132112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:48,061][626795] Updated weights for policy 0, policy_version 132122 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:48,976][24592] Fps is (10 sec: 46697.7, 60 sec: 44682.5, 300 sec: 43903.6). Total num frames: 1082376192. Throughput: 0: 11204.8. Samples: 20581128. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:48,976][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:50,083][626795] Updated weights for policy 0, policy_version 132132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:51,922][626795] Updated weights for policy 0, policy_version 132142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:53,737][626795] Updated weights for policy 0, policy_version 132152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:53,975][24592] Fps is (10 sec: 45055.8, 60 sec: 44783.0, 300 sec: 43848.0). Total num frames: 1082597376. Throughput: 0: 11255.3. Samples: 20646852. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:53,977][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:56,915][626795] Updated weights for policy 0, policy_version 132162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:52:58,676][626795] Updated weights for policy 0, policy_version 132172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:58,975][24592] Fps is (10 sec: 37684.1, 60 sec: 43554.4, 300 sec: 43598.1). Total num frames: 1082753024. Throughput: 0: 11001.4. Samples: 20666124. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:52:58,976][24592] Avg episode reward: [(0, '4.231')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:00,539][626795] Updated weights for policy 0, policy_version 132182 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:02,467][626795] Updated weights for policy 0, policy_version 132192 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:03,975][24592] Fps is (10 sec: 38502.6, 60 sec: 43690.9, 300 sec: 43542.6). Total num frames: 1082982400. Throughput: 0: 11136.5. Samples: 20732226. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:03,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:03,996][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000132201_1082990592.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:04,035][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000130919_1072488448.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:04,215][626795] Updated weights for policy 0, policy_version 132202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:05,912][626795] Updated weights for policy 0, policy_version 132212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:07,666][626795] Updated weights for policy 0, policy_version 132222 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:08,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44237.0, 300 sec: 43542.6). Total num frames: 1083219968. Throughput: 0: 11355.8. Samples: 20803512. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:08,978][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:09,298][626795] Updated weights for policy 0, policy_version 132232 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:11,108][626795] Updated weights for policy 0, policy_version 132242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:12,831][626795] Updated weights for policy 0, policy_version 132252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:13,976][24592] Fps is (10 sec: 47512.2, 60 sec: 44783.1, 300 sec: 43935.8). Total num frames: 1083457536. Throughput: 0: 11196.6. Samples: 20839272. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:13,977][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:14,623][626795] Updated weights for policy 0, policy_version 132262 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:16,180][626795] Updated weights for policy 0, policy_version 132272 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:17,972][626795] Updated weights for policy 0, policy_version 132282 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:18,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45329.5, 300 sec: 43959.1). Total num frames: 1083695104. Throughput: 0: 11261.4. Samples: 20910618. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:18,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:19,803][626795] Updated weights for policy 0, policy_version 132292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:21,556][626795] Updated weights for policy 0, policy_version 132302 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:23,412][626795] Updated weights for policy 0, policy_version 132312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:23,975][24592] Fps is (10 sec: 46695.6, 60 sec: 45056.6, 300 sec: 43931.3). Total num frames: 1083924480. Throughput: 0: 11200.4. Samples: 20979114. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:23,977][24592] Avg episode reward: [(0, '4.394')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:25,112][626795] Updated weights for policy 0, policy_version 132322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:26,943][626795] Updated weights for policy 0, policy_version 132332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:29,945][24592] Fps is (10 sec: 38086.5, 60 sec: 43802.3, 300 sec: 43621.3). Total num frames: 1084112896. Throughput: 0: 10928.0. Samples: 21013218. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:29,946][24592] Avg episode reward: [(0, '4.338')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:30,644][626795] Updated weights for policy 0, policy_version 132342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:32,457][626795] Updated weights for policy 0, policy_version 132352 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:33,976][24592] Fps is (10 sec: 36863.8, 60 sec: 43690.8, 300 sec: 43542.6). Total num frames: 1084293120. Throughput: 0: 10628.7. Samples: 21059418. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:33,978][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:34,305][626795] Updated weights for policy 0, policy_version 132362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:36,207][626795] Updated weights for policy 0, policy_version 132372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:38,081][626795] Updated weights for policy 0, policy_version 132382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:38,975][24592] Fps is (10 sec: 45357.9, 60 sec: 43554.9, 300 sec: 43514.9). Total num frames: 1084522496. Throughput: 0: 10670.4. Samples: 21127020. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:38,976][24592] Avg episode reward: [(0, '4.255')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:39,658][626795] Updated weights for policy 0, policy_version 132392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:41,440][626795] Updated weights for policy 0, policy_version 132402 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:43,141][626795] Updated weights for policy 0, policy_version 132412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:43,975][24592] Fps is (10 sec: 46694.9, 60 sec: 43554.1, 300 sec: 43514.8). Total num frames: 1084760064. Throughput: 0: 11042.5. Samples: 21163038. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:43,976][24592] Avg episode reward: [(0, '4.327')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:44,940][626795] Updated weights for policy 0, policy_version 132422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:46,543][626795] Updated weights for policy 0, policy_version 132432 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:48,314][626795] Updated weights for policy 0, policy_version 132442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:48,975][24592] Fps is (10 sec: 46693.9, 60 sec: 43554.3, 300 sec: 43937.9). Total num frames: 1084989440. Throughput: 0: 11156.5. Samples: 21234270. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:48,976][24592] Avg episode reward: [(0, '4.210')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:49,966][626795] Updated weights for policy 0, policy_version 132452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:51,772][626795] Updated weights for policy 0, policy_version 132462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:53,476][626795] Updated weights for policy 0, policy_version 132472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:53,975][24592] Fps is (10 sec: 46694.2, 60 sec: 43827.2, 300 sec: 43959.1). Total num frames: 1085227008. Throughput: 0: 11155.3. Samples: 21305502. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:53,978][24592] Avg episode reward: [(0, '4.221')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:55,183][626795] Updated weights for policy 0, policy_version 132482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:56,953][626795] Updated weights for policy 0, policy_version 132492 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:53:58,700][626795] Updated weights for policy 0, policy_version 132502 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:58,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45192.5, 300 sec: 43986.9). Total num frames: 1085464576. Throughput: 0: 11146.2. Samples: 21340848. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:53:58,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:00,605][626795] Updated weights for policy 0, policy_version 132512 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:04,135][24592] Fps is (10 sec: 37091.6, 60 sec: 43574.8, 300 sec: 43630.1). Total num frames: 1085603840. Throughput: 0: 10287.1. Samples: 21375180. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:04,136][24592] Avg episode reward: [(0, '4.190')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:04,413][626795] Updated weights for policy 0, policy_version 132522 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:06,297][626795] Updated weights for policy 0, policy_version 132532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:07,922][626795] Updated weights for policy 0, policy_version 132542 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:08,975][24592] Fps is (10 sec: 35225.5, 60 sec: 43281.0, 300 sec: 43570.3). Total num frames: 1085816832. Throughput: 0: 10536.9. Samples: 21453276. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:08,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:09,834][626795] Updated weights for policy 0, policy_version 132552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:11,720][626795] Updated weights for policy 0, policy_version 132562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:13,453][626795] Updated weights for policy 0, policy_version 132572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:13,975][24592] Fps is (10 sec: 45786.2, 60 sec: 43281.2, 300 sec: 43542.6). Total num frames: 1086054400. Throughput: 0: 10770.3. Samples: 21487440. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:13,977][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:15,138][626795] Updated weights for policy 0, policy_version 132582 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:16,888][626795] Updated weights for policy 0, policy_version 132592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:18,492][626795] Updated weights for policy 0, policy_version 132602 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:18,975][24592] Fps is (10 sec: 47513.9, 60 sec: 43281.1, 300 sec: 43542.6). Total num frames: 1086291968. Throughput: 0: 11095.9. Samples: 21558732. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:18,976][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:20,280][626795] Updated weights for policy 0, policy_version 132612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:21,994][626795] Updated weights for policy 0, policy_version 132622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:23,753][626795] Updated weights for policy 0, policy_version 132632 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:23,976][24592] Fps is (10 sec: 47512.7, 60 sec: 43417.5, 300 sec: 43948.9). Total num frames: 1086529536. Throughput: 0: 11188.3. Samples: 21630498. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:23,976][24592] Avg episode reward: [(0, '4.281')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:25,376][626795] Updated weights for policy 0, policy_version 132642 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:27,128][626795] Updated weights for policy 0, policy_version 132652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:28,814][626795] Updated weights for policy 0, policy_version 132662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:28,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44963.4, 300 sec: 44014.6). Total num frames: 1086767104. Throughput: 0: 11176.5. Samples: 21665982. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:28,977][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:30,527][626795] Updated weights for policy 0, policy_version 132672 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:32,251][626795] Updated weights for policy 0, policy_version 132682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:33,975][24592] Fps is (10 sec: 46695.4, 60 sec: 45056.1, 300 sec: 43986.9). Total num frames: 1086996480. Throughput: 0: 11176.3. Samples: 21737202. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:33,977][24592] Avg episode reward: [(0, '4.361')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:34,166][626795] Updated weights for policy 0, policy_version 132692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:36,145][626795] Updated weights for policy 0, policy_version 132702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:38,975][24592] Fps is (10 sec: 36044.6, 60 sec: 43417.5, 300 sec: 43598.1). Total num frames: 1087127552. Throughput: 0: 10566.9. Samples: 21781014. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:38,976][24592] Avg episode reward: [(0, '4.319')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:39,832][626795] Updated weights for policy 0, policy_version 132712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:41,761][626795] Updated weights for policy 0, policy_version 132722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:43,619][626795] Updated weights for policy 0, policy_version 132732 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:43,975][24592] Fps is (10 sec: 35225.6, 60 sec: 43144.5, 300 sec: 43514.8). Total num frames: 1087348736. Throughput: 0: 10520.8. Samples: 21814284. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:43,976][24592] Avg episode reward: [(0, '4.283')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:45,368][626795] Updated weights for policy 0, policy_version 132742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:47,136][626795] Updated weights for policy 0, policy_version 132752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:48,880][626795] Updated weights for policy 0, policy_version 132762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:48,976][24592] Fps is (10 sec: 45874.6, 60 sec: 43281.0, 300 sec: 43514.9). Total num frames: 1087586304. Throughput: 0: 11331.9. Samples: 21883308. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:48,976][24592] Avg episode reward: [(0, '4.352')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:50,559][626795] Updated weights for policy 0, policy_version 132772 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:52,206][626795] Updated weights for policy 0, policy_version 132782 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:53,976][24592] Fps is (10 sec: 47510.4, 60 sec: 43280.6, 300 sec: 43542.5). Total num frames: 1087823872. Throughput: 0: 11148.5. Samples: 21954966. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:53,978][24592] Avg episode reward: [(0, '4.417')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:54,000][626795] Updated weights for policy 0, policy_version 132792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:55,737][626795] Updated weights for policy 0, policy_version 132802 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:57,363][626795] Updated weights for policy 0, policy_version 132812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:58,975][24592] Fps is (10 sec: 47514.3, 60 sec: 43281.1, 300 sec: 43965.0). Total num frames: 1088061440. Throughput: 0: 11172.7. Samples: 21990210. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:54:58,976][24592] Avg episode reward: [(0, '4.282')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:54:59,113][626795] Updated weights for policy 0, policy_version 132822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:00,952][626795] Updated weights for policy 0, policy_version 132832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:02,668][626795] Updated weights for policy 0, policy_version 132842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:03,975][24592] Fps is (10 sec: 47516.6, 60 sec: 45039.2, 300 sec: 43986.9). Total num frames: 1088299008. Throughput: 0: 11172.3. Samples: 22061484. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:03,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:04,023][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000132850_1088307200.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:04,079][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000131569_1077813248.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:04,377][626795] Updated weights for policy 0, policy_version 132852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:06,161][626795] Updated weights for policy 0, policy_version 132862 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:08,024][626795] Updated weights for policy 0, policy_version 132872 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:08,975][24592] Fps is (10 sec: 46694.7, 60 sec: 45192.6, 300 sec: 43986.9). Total num frames: 1088528384. Throughput: 0: 11104.9. Samples: 22130214. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:08,977][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:09,840][626795] Updated weights for policy 0, policy_version 132882 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:13,654][626795] Updated weights for policy 0, policy_version 132892 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:13,975][24592] Fps is (10 sec: 36044.8, 60 sec: 43417.6, 300 sec: 43598.1). Total num frames: 1088659456. Throughput: 0: 10820.4. Samples: 22152900. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:13,977][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:15,469][626795] Updated weights for policy 0, policy_version 132902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:17,336][626795] Updated weights for policy 0, policy_version 132912 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:18,975][24592] Fps is (10 sec: 36044.7, 60 sec: 43281.1, 300 sec: 43542.6). Total num frames: 1088888832. Throughput: 0: 10465.9. Samples: 22208166. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:18,976][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:19,134][626795] Updated weights for policy 0, policy_version 132922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:20,830][626795] Updated weights for policy 0, policy_version 132932 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:22,602][626795] Updated weights for policy 0, policy_version 132942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:23,975][24592] Fps is (10 sec: 45875.1, 60 sec: 43144.6, 300 sec: 43487.0). Total num frames: 1089118208. Throughput: 0: 11050.3. Samples: 22278276. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:23,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:24,339][626795] Updated weights for policy 0, policy_version 132952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:26,110][626795] Updated weights for policy 0, policy_version 132962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:27,957][626795] Updated weights for policy 0, policy_version 132972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:28,975][24592] Fps is (10 sec: 45874.6, 60 sec: 43007.9, 300 sec: 43487.0). Total num frames: 1089347584. Throughput: 0: 11071.8. Samples: 22312518. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:28,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:29,750][626795] Updated weights for policy 0, policy_version 132982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:31,503][626795] Updated weights for policy 0, policy_version 132992 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:33,306][626795] Updated weights for policy 0, policy_version 133002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:33,975][24592] Fps is (10 sec: 45875.4, 60 sec: 43008.0, 300 sec: 43921.7). Total num frames: 1089576960. Throughput: 0: 11068.6. Samples: 22381392. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:33,977][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:35,081][626795] Updated weights for policy 0, policy_version 133012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:36,872][626795] Updated weights for policy 0, policy_version 133022 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:38,690][626795] Updated weights for policy 0, policy_version 133032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:38,976][24592] Fps is (10 sec: 45874.5, 60 sec: 44646.2, 300 sec: 43903.5). Total num frames: 1089806336. Throughput: 0: 10991.7. Samples: 22449588. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:38,977][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:40,524][626795] Updated weights for policy 0, policy_version 133042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:42,188][626795] Updated weights for policy 0, policy_version 133052 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:43,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44782.9, 300 sec: 43875.8). Total num frames: 1090035712. Throughput: 0: 10986.0. Samples: 22484580. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:43,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:44,073][626795] Updated weights for policy 0, policy_version 133062 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:47,338][626795] Updated weights for policy 0, policy_version 133072 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:48,975][24592] Fps is (10 sec: 39322.6, 60 sec: 43554.3, 300 sec: 43625.9). Total num frames: 1090199552. Throughput: 0: 10538.3. Samples: 22535706. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:48,976][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:49,148][626795] Updated weights for policy 0, policy_version 133082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:51,069][626795] Updated weights for policy 0, policy_version 133092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:52,856][626795] Updated weights for policy 0, policy_version 133102 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:53,976][24592] Fps is (10 sec: 38501.6, 60 sec: 43281.4, 300 sec: 43542.5). Total num frames: 1090420736. Throughput: 0: 10506.5. Samples: 22603008. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:53,978][24592] Avg episode reward: [(0, '4.332')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:54,593][626795] Updated weights for policy 0, policy_version 133112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:56,472][626795] Updated weights for policy 0, policy_version 133122 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:55:58,196][626795] Updated weights for policy 0, policy_version 133132 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:58,976][24592] Fps is (10 sec: 45054.2, 60 sec: 43144.3, 300 sec: 43514.8). Total num frames: 1090650112. Throughput: 0: 10754.7. Samples: 22636866. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:55:58,977][24592] Avg episode reward: [(0, '4.313')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:00,051][626795] Updated weights for policy 0, policy_version 133142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:01,821][626795] Updated weights for policy 0, policy_version 133152 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:03,614][626795] Updated weights for policy 0, policy_version 133162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:03,976][24592] Fps is (10 sec: 45055.8, 60 sec: 42871.3, 300 sec: 43487.0). Total num frames: 1090871296. Throughput: 0: 11049.8. Samples: 22705410. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:03,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:05,410][626795] Updated weights for policy 0, policy_version 133172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:07,047][626795] Updated weights for policy 0, policy_version 133182 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:08,830][626795] Updated weights for policy 0, policy_version 133192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:08,976][24592] Fps is (10 sec: 45876.7, 60 sec: 43007.9, 300 sec: 43883.1). Total num frames: 1091108864. Throughput: 0: 11046.9. Samples: 22775388. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:08,979][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:10,759][626795] Updated weights for policy 0, policy_version 133202 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:12,592][626795] Updated weights for policy 0, policy_version 133212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:13,976][24592] Fps is (10 sec: 45875.2, 60 sec: 44509.7, 300 sec: 43875.8). Total num frames: 1091330048. Throughput: 0: 11042.0. Samples: 22809408. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:13,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:14,178][626795] Updated weights for policy 0, policy_version 133222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:16,016][626795] Updated weights for policy 0, policy_version 133232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:17,860][626795] Updated weights for policy 0, policy_version 133242 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:20,176][24592] Fps is (10 sec: 39493.5, 60 sec: 43502.5, 300 sec: 43614.9). Total num frames: 1091551232. Throughput: 0: 10746.6. Samples: 22877898. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:20,177][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:21,249][626795] Updated weights for policy 0, policy_version 133252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:22,931][626795] Updated weights for policy 0, policy_version 133262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:23,976][24592] Fps is (10 sec: 39322.2, 60 sec: 43417.6, 300 sec: 43570.4). Total num frames: 1091723264. Throughput: 0: 10646.7. Samples: 22928688. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:23,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:24,825][626795] Updated weights for policy 0, policy_version 133272 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:26,515][626795] Updated weights for policy 0, policy_version 133282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:28,372][626795] Updated weights for policy 0, policy_version 133292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:28,975][24592] Fps is (10 sec: 45620.1, 60 sec: 43417.7, 300 sec: 43514.8). Total num frames: 1091952640. Throughput: 0: 10639.6. Samples: 22963362. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:28,977][24592] Avg episode reward: [(0, '4.348')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:30,177][626795] Updated weights for policy 0, policy_version 133302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:31,985][626795] Updated weights for policy 0, policy_version 133312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:33,851][626795] Updated weights for policy 0, policy_version 133322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:33,975][24592] Fps is (10 sec: 45875.5, 60 sec: 43417.6, 300 sec: 43487.0). Total num frames: 1092182016. Throughput: 0: 11006.4. Samples: 23030994. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:33,977][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:35,495][626795] Updated weights for policy 0, policy_version 133332 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:37,334][626795] Updated weights for policy 0, policy_version 133342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:37,969][626772] Signal inference workers to stop experience collection... (350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:37,969][626772] Signal inference workers to resume experience collection... (350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:37,981][626795] InferenceWorker_p0-w0: stopping experience collection (350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:37,985][626795] InferenceWorker_p0-w0: resuming experience collection (350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:38,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43417.8, 300 sec: 43459.2). Total num frames: 1092411392. Throughput: 0: 11043.2. Samples: 23099952. Policy #0 lag: (min: 1.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:38,978][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:39,130][626795] Updated weights for policy 0, policy_version 133352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:40,889][626795] Updated weights for policy 0, policy_version 133362 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:42,744][626795] Updated weights for policy 0, policy_version 133372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:43,976][24592] Fps is (10 sec: 45051.5, 60 sec: 43280.3, 300 sec: 43848.4). Total num frames: 1092632576. Throughput: 0: 11064.8. Samples: 23134788. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:43,979][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:44,397][626795] Updated weights for policy 0, policy_version 133382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:46,277][626795] Updated weights for policy 0, policy_version 133392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:48,159][626795] Updated weights for policy 0, policy_version 133402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:48,976][24592] Fps is (10 sec: 45871.2, 60 sec: 44509.2, 300 sec: 43931.2). Total num frames: 1092870144. Throughput: 0: 11048.4. Samples: 23202594. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:48,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:49,915][626795] Updated weights for policy 0, policy_version 133412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:51,706][626795] Updated weights for policy 0, policy_version 133422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:53,993][24592] Fps is (10 sec: 39256.5, 60 sec: 43405.0, 300 sec: 43678.9). Total num frames: 1093025792. Throughput: 0: 10256.7. Samples: 23237118. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:53,994][24592] Avg episode reward: [(0, '4.333')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:55,031][626795] Updated weights for policy 0, policy_version 133432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:56,774][626795] Updated weights for policy 0, policy_version 133442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:56:58,612][626795] Updated weights for policy 0, policy_version 133452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:58,978][24592] Fps is (10 sec: 37678.0, 60 sec: 43279.7, 300 sec: 43681.1). Total num frames: 1093246976. Throughput: 0: 10633.3. Samples: 23287926. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:56:58,979][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:00,476][626795] Updated weights for policy 0, policy_version 133462 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:02,215][626795] Updated weights for policy 0, policy_version 133472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:03,975][24592] Fps is (10 sec: 45135.3, 60 sec: 43417.8, 300 sec: 43764.8). Total num frames: 1093476352. Throughput: 0: 10919.0. Samples: 23356140. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:03,976][24592] Avg episode reward: [(0, '4.388')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000133481_1093476352.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:04,034][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000132201_1082990592.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:04,088][626795] Updated weights for policy 0, policy_version 133482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:05,817][626795] Updated weights for policy 0, policy_version 133492 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:07,703][626795] Updated weights for policy 0, policy_version 133502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:08,976][24592] Fps is (10 sec: 45883.3, 60 sec: 43280.8, 300 sec: 43848.0). Total num frames: 1093705728. Throughput: 0: 11009.6. Samples: 23424126. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:08,977][24592] Avg episode reward: [(0, '4.285')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:09,415][626795] Updated weights for policy 0, policy_version 133512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:11,251][626795] Updated weights for policy 0, policy_version 133522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:12,904][626795] Updated weights for policy 0, policy_version 133532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:13,976][24592] Fps is (10 sec: 46692.4, 60 sec: 43554.0, 300 sec: 43959.1). Total num frames: 1093943296. Throughput: 0: 11015.1. Samples: 23459046. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:13,977][24592] Avg episode reward: [(0, '4.377')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:14,622][626795] Updated weights for policy 0, policy_version 133542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:16,427][626795] Updated weights for policy 0, policy_version 133552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:18,177][626795] Updated weights for policy 0, policy_version 133562 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:18,975][24592] Fps is (10 sec: 46696.6, 60 sec: 44583.1, 300 sec: 43903.7). Total num frames: 1094172672. Throughput: 0: 11064.7. Samples: 23528904. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:18,977][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:20,013][626795] Updated weights for policy 0, policy_version 133572 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:21,872][626795] Updated weights for policy 0, policy_version 133582 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:23,517][626795] Updated weights for policy 0, policy_version 133592 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:23,975][24592] Fps is (10 sec: 45057.8, 60 sec: 44509.9, 300 sec: 43903.6). Total num frames: 1094393856. Throughput: 0: 11049.2. Samples: 23597166. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:23,977][24592] Avg episode reward: [(0, '4.459')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:25,386][626795] Updated weights for policy 0, policy_version 133602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:28,807][626795] Updated weights for policy 0, policy_version 133612 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:28,975][24592] Fps is (10 sec: 38502.4, 60 sec: 43417.6, 300 sec: 43681.5). Total num frames: 1094557696. Throughput: 0: 10947.4. Samples: 23627412. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:28,978][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:30,559][626795] Updated weights for policy 0, policy_version 133622 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:32,352][626795] Updated weights for policy 0, policy_version 133632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:33,975][24592] Fps is (10 sec: 38501.9, 60 sec: 43281.0, 300 sec: 43626.0). Total num frames: 1094778880. Throughput: 0: 10657.8. Samples: 23682186. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:33,976][24592] Avg episode reward: [(0, '4.389')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:34,243][626795] Updated weights for policy 0, policy_version 133642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:35,913][626795] Updated weights for policy 0, policy_version 133652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:37,755][626795] Updated weights for policy 0, policy_version 133662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:38,975][24592] Fps is (10 sec: 45055.8, 60 sec: 43281.0, 300 sec: 43598.1). Total num frames: 1095008256. Throughput: 0: 11415.5. Samples: 23750616. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:38,976][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:39,589][626795] Updated weights for policy 0, policy_version 133672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:41,307][626795] Updated weights for policy 0, policy_version 133682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:43,078][626795] Updated weights for policy 0, policy_version 133692 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:43,976][24592] Fps is (10 sec: 46693.2, 60 sec: 43554.6, 300 sec: 43625.9). Total num frames: 1095245824. Throughput: 0: 11039.1. Samples: 23784666. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:43,977][24592] Avg episode reward: [(0, '4.424')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:44,854][626795] Updated weights for policy 0, policy_version 133702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:46,534][626795] Updated weights for policy 0, policy_version 133712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:48,376][626795] Updated weights for policy 0, policy_version 133722 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:48,975][24592] Fps is (10 sec: 46694.5, 60 sec: 43418.2, 300 sec: 43653.6). Total num frames: 1095475200. Throughput: 0: 11088.8. Samples: 23855136. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:48,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:50,166][626795] Updated weights for policy 0, policy_version 133732 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:51,836][626795] Updated weights for policy 0, policy_version 133742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:53,650][626795] Updated weights for policy 0, policy_version 133752 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:53,975][24592] Fps is (10 sec: 45877.0, 60 sec: 44659.5, 300 sec: 43903.6). Total num frames: 1095704576. Throughput: 0: 11119.9. Samples: 23924514. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:53,977][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:55,561][626795] Updated weights for policy 0, policy_version 133762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:57,316][626795] Updated weights for policy 0, policy_version 133772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:58,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44784.6, 300 sec: 43903.6). Total num frames: 1095933952. Throughput: 0: 11099.2. Samples: 23958504. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:57:58,977][24592] Avg episode reward: [(0, '4.353')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:57:59,097][626795] Updated weights for policy 0, policy_version 133782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:02,453][626795] Updated weights for policy 0, policy_version 133792 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:03,975][24592] Fps is (10 sec: 38502.1, 60 sec: 43554.1, 300 sec: 43625.9). Total num frames: 1096089600. Throughput: 0: 10657.0. Samples: 24008472. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:03,977][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:04,316][626795] Updated weights for policy 0, policy_version 133802 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:06,043][626795] Updated weights for policy 0, policy_version 133812 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:07,764][626795] Updated weights for policy 0, policy_version 133822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:08,975][24592] Fps is (10 sec: 38502.4, 60 sec: 43554.5, 300 sec: 43598.1). Total num frames: 1096318976. Throughput: 0: 10666.9. Samples: 24077178. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:08,977][24592] Avg episode reward: [(0, '4.402')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:09,640][626795] Updated weights for policy 0, policy_version 133832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:11,505][626795] Updated weights for policy 0, policy_version 133842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:13,279][626795] Updated weights for policy 0, policy_version 133852 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:13,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43417.9, 300 sec: 43570.3). Total num frames: 1096548352. Throughput: 0: 10745.2. Samples: 24110946. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:13,977][24592] Avg episode reward: [(0, '4.429')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:15,035][626795] Updated weights for policy 0, policy_version 133862 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:16,747][626795] Updated weights for policy 0, policy_version 133872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:18,650][626795] Updated weights for policy 0, policy_version 133882 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:18,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43417.6, 300 sec: 43570.3). Total num frames: 1096777728. Throughput: 0: 11059.1. Samples: 24179844. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:18,977][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:20,269][626795] Updated weights for policy 0, policy_version 133892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:22,010][626795] Updated weights for policy 0, policy_version 133902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:23,829][626795] Updated weights for policy 0, policy_version 133912 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:23,975][24592] Fps is (10 sec: 45875.4, 60 sec: 43554.1, 300 sec: 43853.3). Total num frames: 1097007104. Throughput: 0: 11102.1. Samples: 24250212. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:23,976][24592] Avg episode reward: [(0, '4.380')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:25,642][626795] Updated weights for policy 0, policy_version 133922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:27,432][626795] Updated weights for policy 0, policy_version 133932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:28,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44646.4, 300 sec: 43875.8). Total num frames: 1097236480. Throughput: 0: 11109.8. Samples: 24284604. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:28,977][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:29,232][626795] Updated weights for policy 0, policy_version 133942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:31,048][626795] Updated weights for policy 0, policy_version 133952 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:32,777][626795] Updated weights for policy 0, policy_version 133962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:35,550][24592] Fps is (10 sec: 39633.4, 60 sec: 43637.6, 300 sec: 43642.8). Total num frames: 1097465856. Throughput: 0: 10689.1. Samples: 24352980. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:35,551][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:36,155][626795] Updated weights for policy 0, policy_version 133972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:38,020][626795] Updated weights for policy 0, policy_version 133982 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:38,976][24592] Fps is (10 sec: 38501.6, 60 sec: 43554.1, 300 sec: 43598.1). Total num frames: 1097621504. Throughput: 0: 10650.2. Samples: 24403776. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:38,977][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:39,695][626795] Updated weights for policy 0, policy_version 133992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:41,559][626795] Updated weights for policy 0, policy_version 134002 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:43,307][626795] Updated weights for policy 0, policy_version 134012 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:43,976][24592] Fps is (10 sec: 45696.9, 60 sec: 43417.5, 300 sec: 43598.0). Total num frames: 1097850880. Throughput: 0: 10659.1. Samples: 24438168. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:43,977][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:45,186][626795] Updated weights for policy 0, policy_version 134022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:46,881][626795] Updated weights for policy 0, policy_version 134032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:48,717][626795] Updated weights for policy 0, policy_version 134042 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:48,975][24592] Fps is (10 sec: 45875.4, 60 sec: 43417.6, 300 sec: 43570.3). Total num frames: 1098080256. Throughput: 0: 11066.8. Samples: 24506478. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:48,977][24592] Avg episode reward: [(0, '4.157')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:50,466][626795] Updated weights for policy 0, policy_version 134052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:52,250][626795] Updated weights for policy 0, policy_version 134062 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:53,975][24592] Fps is (10 sec: 45877.5, 60 sec: 43417.6, 300 sec: 43542.6). Total num frames: 1098309632. Throughput: 0: 11096.4. Samples: 24576516. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:53,981][24592] Avg episode reward: [(0, '4.419')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:53,988][626795] Updated weights for policy 0, policy_version 134072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:55,710][626795] Updated weights for policy 0, policy_version 134082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:57,486][626795] Updated weights for policy 0, policy_version 134092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:58,975][24592] Fps is (10 sec: 46695.0, 60 sec: 43554.2, 300 sec: 43899.5). Total num frames: 1098547200. Throughput: 0: 11128.7. Samples: 24611736. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:58:58,976][24592] Avg episode reward: [(0, '4.474')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:58:59,237][626795] Updated weights for policy 0, policy_version 134102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:01,086][626795] Updated weights for policy 0, policy_version 134112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:02,896][626795] Updated weights for policy 0, policy_version 134122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:03,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44783.0, 300 sec: 43931.3). Total num frames: 1098776576. Throughput: 0: 11121.9. Samples: 24680328. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:03,977][24592] Avg episode reward: [(0, '4.359')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000134128_1098776576.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000132850_1088307200.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:04,625][626795] Updated weights for policy 0, policy_version 134132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:06,409][626795] Updated weights for policy 0, policy_version 134142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:09,425][24592] Fps is (10 sec: 38413.7, 60 sec: 43501.2, 300 sec: 43642.7). Total num frames: 1098948608. Throughput: 0: 10211.4. Samples: 24714318. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:09,426][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:09,994][626795] Updated weights for policy 0, policy_version 134152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:11,610][626795] Updated weights for policy 0, policy_version 134162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:13,412][626795] Updated weights for policy 0, policy_version 134172 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:13,976][24592] Fps is (10 sec: 37681.3, 60 sec: 43417.3, 300 sec: 43598.0). Total num frames: 1099153408. Throughput: 0: 10666.8. Samples: 24764616. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:13,978][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:15,304][626795] Updated weights for policy 0, policy_version 134182 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:17,058][626795] Updated weights for policy 0, policy_version 134192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:18,740][626795] Updated weights for policy 0, policy_version 134202 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:18,975][24592] Fps is (10 sec: 45461.4, 60 sec: 43417.6, 300 sec: 43570.4). Total num frames: 1099382784. Throughput: 0: 11047.3. Samples: 24832710. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:18,976][24592] Avg episode reward: [(0, '4.402')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:20,641][626795] Updated weights for policy 0, policy_version 134212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:22,416][626795] Updated weights for policy 0, policy_version 134222 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:23,976][24592] Fps is (10 sec: 45875.3, 60 sec: 43417.3, 300 sec: 43542.5). Total num frames: 1099612160. Throughput: 0: 11075.1. Samples: 24902160. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:23,977][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:24,144][626795] Updated weights for policy 0, policy_version 134232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:25,794][626795] Updated weights for policy 0, policy_version 134242 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:27,719][626795] Updated weights for policy 0, policy_version 134252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:28,975][24592] Fps is (10 sec: 46694.6, 60 sec: 43554.1, 300 sec: 43570.3). Total num frames: 1099849728. Throughput: 0: 11086.1. Samples: 24937038. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:28,976][24592] Avg episode reward: [(0, '4.334')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:29,364][626795] Updated weights for policy 0, policy_version 134262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:31,224][626795] Updated weights for policy 0, policy_version 134272 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:33,017][626795] Updated weights for policy 0, policy_version 134282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:33,975][24592] Fps is (10 sec: 46696.1, 60 sec: 44728.1, 300 sec: 43903.6). Total num frames: 1100079104. Throughput: 0: 11112.4. Samples: 25006536. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:33,976][24592] Avg episode reward: [(0, '4.338')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:34,780][626795] Updated weights for policy 0, policy_version 134292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:36,607][626795] Updated weights for policy 0, policy_version 134302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:38,374][626795] Updated weights for policy 0, policy_version 134312 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:38,976][24592] Fps is (10 sec: 45873.2, 60 sec: 44782.7, 300 sec: 43931.3). Total num frames: 1100308480. Throughput: 0: 11075.1. Samples: 25074900. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:38,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:40,159][626795] Updated weights for policy 0, policy_version 134322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:43,614][626795] Updated weights for policy 0, policy_version 134332 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:43,975][24592] Fps is (10 sec: 38502.6, 60 sec: 43554.5, 300 sec: 43653.7). Total num frames: 1100464128. Throughput: 0: 11047.2. Samples: 25108860. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:43,977][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:45,229][626795] Updated weights for policy 0, policy_version 134342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:47,158][626795] Updated weights for policy 0, policy_version 134352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:48,939][626795] Updated weights for policy 0, policy_version 134362 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:48,975][24592] Fps is (10 sec: 38503.9, 60 sec: 43554.2, 300 sec: 43626.0). Total num frames: 1100693504. Throughput: 0: 10649.7. Samples: 25159566. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:48,977][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:50,676][626795] Updated weights for policy 0, policy_version 134372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:52,541][626795] Updated weights for policy 0, policy_version 134382 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:53,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43554.1, 300 sec: 43598.1). Total num frames: 1100922880. Throughput: 0: 11534.0. Samples: 25228164. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:53,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:54,406][626795] Updated weights for policy 0, policy_version 134392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:56,008][626795] Updated weights for policy 0, policy_version 134402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:57,881][626795] Updated weights for policy 0, policy_version 134412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:58,976][24592] Fps is (10 sec: 45873.3, 60 sec: 43417.3, 300 sec: 43570.3). Total num frames: 1101152256. Throughput: 0: 11055.5. Samples: 25262112. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:59:58,977][24592] Avg episode reward: [(0, '4.247')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:59:59,544][626795] Updated weights for policy 0, policy_version 134422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:01,353][626795] Updated weights for policy 0, policy_version 134432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:03,221][626795] Updated weights for policy 0, policy_version 134442 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:03,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43417.6, 300 sec: 43570.3). Total num frames: 1101381632. Throughput: 0: 11105.2. Samples: 25332444. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:03,977][24592] Avg episode reward: [(0, '4.387')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:04,850][626795] Updated weights for policy 0, policy_version 134452 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:06,611][626795] Updated weights for policy 0, policy_version 134462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:08,550][626795] Updated weights for policy 0, policy_version 134472 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:08,976][24592] Fps is (10 sec: 45872.9, 60 sec: 44707.6, 300 sec: 43903.4). Total num frames: 1101611008. Throughput: 0: 11085.5. Samples: 25401012. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:08,978][24592] Avg episode reward: [(0, '4.328')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:10,269][626795] Updated weights for policy 0, policy_version 134482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:11,986][626795] Updated weights for policy 0, policy_version 134492 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:13,818][626795] Updated weights for policy 0, policy_version 134502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:13,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44783.3, 300 sec: 43903.6). Total num frames: 1101840384. Throughput: 0: 11079.1. Samples: 25435596. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:13,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:17,289][626795] Updated weights for policy 0, policy_version 134512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:18,971][626795] Updated weights for policy 0, policy_version 134522 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:18,975][24592] Fps is (10 sec: 39325.2, 60 sec: 43690.7, 300 sec: 43681.4). Total num frames: 1102004224. Throughput: 0: 10657.0. Samples: 25486098. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:18,976][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:20,808][626795] Updated weights for policy 0, policy_version 134532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:22,585][626795] Updated weights for policy 0, policy_version 134542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:23,975][24592] Fps is (10 sec: 39321.6, 60 sec: 43691.0, 300 sec: 43681.4). Total num frames: 1102233600. Throughput: 0: 10670.4. Samples: 25555062. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:23,977][24592] Avg episode reward: [(0, '4.344')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:24,424][626795] Updated weights for policy 0, policy_version 134552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:26,110][626795] Updated weights for policy 0, policy_version 134562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:27,925][626795] Updated weights for policy 0, policy_version 134572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:28,976][24592] Fps is (10 sec: 45053.3, 60 sec: 43417.1, 300 sec: 43653.6). Total num frames: 1102454784. Throughput: 0: 10673.2. Samples: 25589160. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:28,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:29,736][626795] Updated weights for policy 0, policy_version 134582 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:31,545][626795] Updated weights for policy 0, policy_version 134592 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:33,257][626795] Updated weights for policy 0, policy_version 134602 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:33,975][24592] Fps is (10 sec: 45875.0, 60 sec: 43554.2, 300 sec: 43681.4). Total num frames: 1102692352. Throughput: 0: 11087.9. Samples: 25658520. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:33,976][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:35,012][626795] Updated weights for policy 0, policy_version 134612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:36,790][626795] Updated weights for policy 0, policy_version 134622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:38,483][626795] Updated weights for policy 0, policy_version 134632 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:38,976][24592] Fps is (10 sec: 46696.1, 60 sec: 43554.2, 300 sec: 43681.4). Total num frames: 1102921728. Throughput: 0: 11125.1. Samples: 25728798. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:38,977][24592] Avg episode reward: [(0, '4.212')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:40,296][626795] Updated weights for policy 0, policy_version 134642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:42,182][626795] Updated weights for policy 0, policy_version 134652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:43,820][626795] Updated weights for policy 0, policy_version 134662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:43,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44782.9, 300 sec: 43903.6). Total num frames: 1103151104. Throughput: 0: 11131.8. Samples: 25763040. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:43,976][24592] Avg episode reward: [(0, '4.315')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:45,652][626795] Updated weights for policy 0, policy_version 134672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:47,606][626795] Updated weights for policy 0, policy_version 134682 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:48,976][24592] Fps is (10 sec: 44233.5, 60 sec: 44509.1, 300 sec: 43875.7). Total num frames: 1103364096. Throughput: 0: 11061.5. Samples: 25830222. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:48,978][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:51,206][626795] Updated weights for policy 0, policy_version 134692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:53,250][626795] Updated weights for policy 0, policy_version 134702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:53,975][24592] Fps is (10 sec: 36044.9, 60 sec: 43144.5, 300 sec: 43598.2). Total num frames: 1103511552. Throughput: 0: 10517.8. Samples: 25874304. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:53,977][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:55,256][626795] Updated weights for policy 0, policy_version 134712 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:56,961][626795] Updated weights for policy 0, policy_version 134722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:00:58,835][626795] Updated weights for policy 0, policy_version 134732 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:58,975][24592] Fps is (10 sec: 36048.5, 60 sec: 42871.8, 300 sec: 43570.4). Total num frames: 1103724544. Throughput: 0: 10477.7. Samples: 25907094. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:00:58,977][24592] Avg episode reward: [(0, '4.360')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:00,802][626795] Updated weights for policy 0, policy_version 134742 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:00,829][626772] Signal inference workers to stop experience collection... (400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:00,831][626772] Signal inference workers to resume experience collection... (400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:00,840][626795] InferenceWorker_p0-w0: stopping experience collection (400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:00,846][626795] InferenceWorker_p0-w0: resuming experience collection (400 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:02,603][626795] Updated weights for policy 0, policy_version 134752 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:03,976][24592] Fps is (10 sec: 43416.5, 60 sec: 42734.7, 300 sec: 43514.8). Total num frames: 1103945728. Throughput: 0: 10819.9. Samples: 25972998. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:03,977][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000134759_1103945728.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:04,038][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000133481_1093476352.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:04,418][626795] Updated weights for policy 0, policy_version 134762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:06,210][626795] Updated weights for policy 0, policy_version 134772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:07,977][626795] Updated weights for policy 0, policy_version 134782 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:08,975][24592] Fps is (10 sec: 45055.6, 60 sec: 42735.5, 300 sec: 43542.6). Total num frames: 1104175104. Throughput: 0: 10819.6. Samples: 26041944. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:08,977][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:09,709][626795] Updated weights for policy 0, policy_version 134792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:11,613][626795] Updated weights for policy 0, policy_version 134802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:13,803][626795] Updated weights for policy 0, policy_version 134812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:13,976][24592] Fps is (10 sec: 43414.3, 60 sec: 42324.6, 300 sec: 43664.7). Total num frames: 1104379904. Throughput: 0: 10833.8. Samples: 26076684. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:13,978][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:15,861][626795] Updated weights for policy 0, policy_version 134822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:17,837][626795] Updated weights for policy 0, policy_version 134832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:18,975][24592] Fps is (10 sec: 40960.5, 60 sec: 43008.0, 300 sec: 43598.1). Total num frames: 1104584704. Throughput: 0: 10582.2. Samples: 26134716. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:18,976][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:19,785][626795] Updated weights for policy 0, policy_version 134842 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:21,674][626795] Updated weights for policy 0, policy_version 134852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:24,993][24592] Fps is (10 sec: 36435.7, 60 sec: 41753.5, 300 sec: 43337.5). Total num frames: 1104781312. Throughput: 0: 9522.3. Samples: 26166990. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:24,995][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:25,135][626795] Updated weights for policy 0, policy_version 134862 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:27,027][626795] Updated weights for policy 0, policy_version 134872 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:28,876][626795] Updated weights for policy 0, policy_version 134882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:28,975][24592] Fps is (10 sec: 36863.8, 60 sec: 41643.1, 300 sec: 43292.6). Total num frames: 1104953344. Throughput: 0: 10026.1. Samples: 26214216. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:28,976][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:30,777][626795] Updated weights for policy 0, policy_version 134892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:32,675][626795] Updated weights for policy 0, policy_version 134902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:33,975][24592] Fps is (10 sec: 42865.8, 60 sec: 41233.1, 300 sec: 43237.1). Total num frames: 1105166336. Throughput: 0: 9973.0. Samples: 26278998. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:33,976][24592] Avg episode reward: [(0, '4.349')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:34,600][626795] Updated weights for policy 0, policy_version 134912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:36,536][626795] Updated weights for policy 0, policy_version 134922 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:38,308][626795] Updated weights for policy 0, policy_version 134932 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:38,975][24592] Fps is (10 sec: 43417.9, 60 sec: 41096.8, 300 sec: 43237.3). Total num frames: 1105387520. Throughput: 0: 10453.2. Samples: 26344698. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:38,976][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:40,150][626795] Updated weights for policy 0, policy_version 134942 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:42,101][626795] Updated weights for policy 0, policy_version 134952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:43,796][626795] Updated weights for policy 0, policy_version 134962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:43,976][24592] Fps is (10 sec: 44235.9, 60 sec: 40959.9, 300 sec: 43181.7). Total num frames: 1105608704. Throughput: 0: 10472.7. Samples: 26378370. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:43,978][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:45,677][626795] Updated weights for policy 0, policy_version 134972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:47,531][626795] Updated weights for policy 0, policy_version 134982 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:48,976][24592] Fps is (10 sec: 44234.6, 60 sec: 41096.9, 300 sec: 43406.2). Total num frames: 1105829888. Throughput: 0: 10482.1. Samples: 26444694. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:48,978][24592] Avg episode reward: [(0, '4.564')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:49,491][626795] Updated weights for policy 0, policy_version 134992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:51,407][626795] Updated weights for policy 0, policy_version 135002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:53,265][626795] Updated weights for policy 0, policy_version 135012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:53,975][24592] Fps is (10 sec: 43418.4, 60 sec: 42188.8, 300 sec: 43376.3). Total num frames: 1106042880. Throughput: 0: 10396.8. Samples: 26509800. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:53,976][24592] Avg episode reward: [(0, '4.378')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:55,116][626795] Updated weights for policy 0, policy_version 135022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:01:56,979][626795] Updated weights for policy 0, policy_version 135032 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:58,975][24592] Fps is (10 sec: 36046.3, 60 sec: 41096.5, 300 sec: 43098.2). Total num frames: 1106190336. Throughput: 0: 10344.6. Samples: 26542182. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:01:58,976][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:00,577][626795] Updated weights for policy 0, policy_version 135042 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:02,523][626795] Updated weights for policy 0, policy_version 135052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:03,975][24592] Fps is (10 sec: 36044.8, 60 sec: 40960.2, 300 sec: 43042.8). Total num frames: 1106403328. Throughput: 0: 10097.1. Samples: 26589084. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:03,979][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:04,424][626795] Updated weights for policy 0, policy_version 135062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:06,231][626795] Updated weights for policy 0, policy_version 135072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:08,180][626795] Updated weights for policy 0, policy_version 135082 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:08,976][24592] Fps is (10 sec: 42597.8, 60 sec: 40686.9, 300 sec: 42959.4). Total num frames: 1106616320. Throughput: 0: 11060.6. Samples: 26653458. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:08,977][24592] Avg episode reward: [(0, '4.309')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:10,139][626795] Updated weights for policy 0, policy_version 135092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:11,854][626795] Updated weights for policy 0, policy_version 135102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:13,739][626795] Updated weights for policy 0, policy_version 135112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:13,975][24592] Fps is (10 sec: 44236.8, 60 sec: 41097.2, 300 sec: 42959.4). Total num frames: 1106845696. Throughput: 0: 10496.9. Samples: 26686578. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:13,976][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:15,638][626795] Updated weights for policy 0, policy_version 135122 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:17,470][626795] Updated weights for policy 0, policy_version 135132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:18,975][24592] Fps is (10 sec: 45056.7, 60 sec: 41369.6, 300 sec: 42959.4). Total num frames: 1107066880. Throughput: 0: 10546.3. Samples: 26753580. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:18,976][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:19,304][626795] Updated weights for policy 0, policy_version 135142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:21,189][626795] Updated weights for policy 0, policy_version 135152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:23,113][626795] Updated weights for policy 0, policy_version 135162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:23,976][24592] Fps is (10 sec: 43416.8, 60 sec: 42361.2, 300 sec: 43126.0). Total num frames: 1107279872. Throughput: 0: 10517.0. Samples: 26817966. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:23,976][24592] Avg episode reward: [(0, '4.229')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:25,065][626795] Updated weights for policy 0, policy_version 135172 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:26,983][626795] Updated weights for policy 0, policy_version 135182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:28,835][626795] Updated weights for policy 0, policy_version 135192 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:28,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42325.3, 300 sec: 43098.3). Total num frames: 1107492864. Throughput: 0: 10481.2. Samples: 26850024. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:28,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:30,798][626795] Updated weights for policy 0, policy_version 135202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:33,975][24592] Fps is (10 sec: 36045.4, 60 sec: 41233.0, 300 sec: 42820.6). Total num frames: 1107640320. Throughput: 0: 10110.0. Samples: 26899638. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:33,977][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:34,211][626795] Updated weights for policy 0, policy_version 135212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:36,181][626795] Updated weights for policy 0, policy_version 135222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:38,090][626795] Updated weights for policy 0, policy_version 135232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:38,976][24592] Fps is (10 sec: 36043.0, 60 sec: 41096.1, 300 sec: 42737.2). Total num frames: 1107853312. Throughput: 0: 10052.8. Samples: 26962182. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:38,977][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:39,919][626795] Updated weights for policy 0, policy_version 135242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:41,815][626795] Updated weights for policy 0, policy_version 135252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:43,690][626795] Updated weights for policy 0, policy_version 135262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:43,976][24592] Fps is (10 sec: 42596.4, 60 sec: 40959.8, 300 sec: 42681.6). Total num frames: 1108066304. Throughput: 0: 10060.6. Samples: 26994912. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:43,977][24592] Avg episode reward: [(0, '4.462')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:45,588][626795] Updated weights for policy 0, policy_version 135272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:47,366][626795] Updated weights for policy 0, policy_version 135282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:48,975][24592] Fps is (10 sec: 44239.2, 60 sec: 41096.9, 300 sec: 42681.7). Total num frames: 1108295680. Throughput: 0: 10500.7. Samples: 27061614. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:48,976][24592] Avg episode reward: [(0, '4.267')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:49,266][626795] Updated weights for policy 0, policy_version 135292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:50,987][626795] Updated weights for policy 0, policy_version 135302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:52,974][626795] Updated weights for policy 0, policy_version 135312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:53,975][24592] Fps is (10 sec: 45057.9, 60 sec: 41233.0, 300 sec: 42653.9). Total num frames: 1108516864. Throughput: 0: 10514.8. Samples: 27126624. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:53,977][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:54,980][626795] Updated weights for policy 0, policy_version 135322 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:56,934][626795] Updated weights for policy 0, policy_version 135332 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:02:58,737][626795] Updated weights for policy 0, policy_version 135342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:58,978][24592] Fps is (10 sec: 42585.4, 60 sec: 42186.7, 300 sec: 42820.1). Total num frames: 1108721664. Throughput: 0: 10476.9. Samples: 27158070. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:02:58,979][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:00,754][626795] Updated weights for policy 0, policy_version 135352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:02,584][626795] Updated weights for policy 0, policy_version 135362 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:03,976][24592] Fps is (10 sec: 42597.1, 60 sec: 42325.1, 300 sec: 42792.7). Total num frames: 1108942848. Throughput: 0: 10426.4. Samples: 27222774. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:03,978][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000135369_1108942848.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:04,051][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000134128_1098776576.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:04,476][626795] Updated weights for policy 0, policy_version 135372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:08,001][626795] Updated weights for policy 0, policy_version 135382 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:08,975][24592] Fps is (10 sec: 36875.1, 60 sec: 41233.2, 300 sec: 42515.1). Total num frames: 1109090304. Throughput: 0: 10052.6. Samples: 27270330. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:08,976][24592] Avg episode reward: [(0, '4.382')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:09,928][626795] Updated weights for policy 0, policy_version 135392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:11,783][626795] Updated weights for policy 0, policy_version 135402 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:13,637][626795] Updated weights for policy 0, policy_version 135412 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:13,975][24592] Fps is (10 sec: 36046.2, 60 sec: 40960.0, 300 sec: 42459.6). Total num frames: 1109303296. Throughput: 0: 10061.6. Samples: 27302796. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:13,977][24592] Avg episode reward: [(0, '4.237')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:15,487][626795] Updated weights for policy 0, policy_version 135422 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:17,364][626795] Updated weights for policy 0, policy_version 135432 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:18,975][24592] Fps is (10 sec: 43417.8, 60 sec: 40960.0, 300 sec: 42431.8). Total num frames: 1109524480. Throughput: 0: 10414.8. Samples: 27368304. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:18,977][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:19,216][626795] Updated weights for policy 0, policy_version 135442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:21,087][626795] Updated weights for policy 0, policy_version 135452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:22,928][626795] Updated weights for policy 0, policy_version 135462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:23,976][24592] Fps is (10 sec: 45051.9, 60 sec: 41232.6, 300 sec: 42431.6). Total num frames: 1109753856. Throughput: 0: 10520.8. Samples: 27435624. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:23,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:24,701][626795] Updated weights for policy 0, policy_version 135472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:26,629][626795] Updated weights for policy 0, policy_version 135482 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:28,570][626795] Updated weights for policy 0, policy_version 135492 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:28,976][24592] Fps is (10 sec: 44235.7, 60 sec: 41232.9, 300 sec: 42603.7). Total num frames: 1109966848. Throughput: 0: 10513.1. Samples: 27468000. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:28,979][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:30,458][626795] Updated weights for policy 0, policy_version 135502 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:32,376][626795] Updated weights for policy 0, policy_version 135512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:33,975][24592] Fps is (10 sec: 42602.1, 60 sec: 42325.3, 300 sec: 42570.7). Total num frames: 1110179840. Throughput: 0: 10451.7. Samples: 27531942. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:33,977][24592] Avg episode reward: [(0, '4.230')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:34,398][626795] Updated weights for policy 0, policy_version 135522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:36,233][626795] Updated weights for policy 0, policy_version 135532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:38,260][626795] Updated weights for policy 0, policy_version 135542 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:40,648][24592] Fps is (10 sec: 35793.9, 60 sec: 41045.1, 300 sec: 42247.9). Total num frames: 1110384640. Throughput: 0: 10046.4. Samples: 27595512. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:40,660][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:41,858][626795] Updated weights for policy 0, policy_version 135552 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:43,823][626795] Updated weights for policy 0, policy_version 135562 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:43,976][24592] Fps is (10 sec: 34405.6, 60 sec: 40960.2, 300 sec: 42181.8). Total num frames: 1110523904. Throughput: 0: 10025.4. Samples: 27609186. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:43,977][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:45,608][626795] Updated weights for policy 0, policy_version 135572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:47,436][626795] Updated weights for policy 0, policy_version 135582 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:48,976][24592] Fps is (10 sec: 44265.2, 60 sec: 40959.7, 300 sec: 42181.8). Total num frames: 1110753280. Throughput: 0: 10049.3. Samples: 27674994. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:48,978][24592] Avg episode reward: [(0, '4.375')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:49,230][626795] Updated weights for policy 0, policy_version 135592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:51,024][626795] Updated weights for policy 0, policy_version 135602 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:52,978][626795] Updated weights for policy 0, policy_version 135612 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:53,975][24592] Fps is (10 sec: 45057.1, 60 sec: 40960.1, 300 sec: 42126.3). Total num frames: 1110974464. Throughput: 0: 10490.4. Samples: 27742398. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:53,976][24592] Avg episode reward: [(0, '4.373')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:54,735][626795] Updated weights for policy 0, policy_version 135622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:56,528][626795] Updated weights for policy 0, policy_version 135632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:03:58,420][626795] Updated weights for policy 0, policy_version 135642 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:58,975][24592] Fps is (10 sec: 44238.4, 60 sec: 41235.2, 300 sec: 42098.6). Total num frames: 1111195648. Throughput: 0: 10529.1. Samples: 27776604. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:03:58,976][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:00,278][626795] Updated weights for policy 0, policy_version 135652 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:02,130][626795] Updated weights for policy 0, policy_version 135662 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:03,976][24592] Fps is (10 sec: 44235.8, 60 sec: 41233.2, 300 sec: 42329.6). Total num frames: 1111416832. Throughput: 0: 10533.1. Samples: 27842298. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:03,977][24592] Avg episode reward: [(0, '4.146')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:04,015][626795] Updated weights for policy 0, policy_version 135672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:06,022][626795] Updated weights for policy 0, policy_version 135682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:07,826][626795] Updated weights for policy 0, policy_version 135692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:08,976][24592] Fps is (10 sec: 44235.6, 60 sec: 42461.7, 300 sec: 42320.7). Total num frames: 1111638016. Throughput: 0: 10465.8. Samples: 27906576. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:08,977][24592] Avg episode reward: [(0, '4.238')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:09,687][626795] Updated weights for policy 0, policy_version 135702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:11,481][626795] Updated weights for policy 0, policy_version 135712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:15,102][24592] Fps is (10 sec: 35341.8, 60 sec: 41009.4, 300 sec: 41966.1). Total num frames: 1111810048. Throughput: 0: 10256.3. Samples: 27941082. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:15,105][24592] Avg episode reward: [(0, '4.313')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:15,511][626795] Updated weights for policy 0, policy_version 135722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:17,463][626795] Updated weights for policy 0, policy_version 135732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:18,976][24592] Fps is (10 sec: 34405.9, 60 sec: 40959.7, 300 sec: 41931.9). Total num frames: 1111982080. Throughput: 0: 10010.6. Samples: 27982422. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:18,978][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:19,262][626795] Updated weights for policy 0, policy_version 135742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:21,089][626795] Updated weights for policy 0, policy_version 135752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:22,997][626795] Updated weights for policy 0, policy_version 135762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:23,977][24592] Fps is (10 sec: 43383.0, 60 sec: 40686.7, 300 sec: 41848.4). Total num frames: 1112195072. Throughput: 0: 10438.3. Samples: 28047792. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:23,978][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:24,907][626795] Updated weights for policy 0, policy_version 135772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:26,749][626795] Updated weights for policy 0, policy_version 135782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:28,575][626795] Updated weights for policy 0, policy_version 135792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:28,977][24592] Fps is (10 sec: 43412.7, 60 sec: 40822.5, 300 sec: 41820.6). Total num frames: 1112416256. Throughput: 0: 10485.3. Samples: 28081038. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:28,978][24592] Avg episode reward: [(0, '4.417')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:30,552][626795] Updated weights for policy 0, policy_version 135802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:32,326][626795] Updated weights for policy 0, policy_version 135812 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:33,975][24592] Fps is (10 sec: 44242.3, 60 sec: 40960.0, 300 sec: 41793.1). Total num frames: 1112637440. Throughput: 0: 10474.6. Samples: 28146348. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:33,976][24592] Avg episode reward: [(0, '4.197')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:34,309][626795] Updated weights for policy 0, policy_version 135822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:36,106][626795] Updated weights for policy 0, policy_version 135832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:38,043][626795] Updated weights for policy 0, policy_version 135842 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:38,976][24592] Fps is (10 sec: 43420.7, 60 sec: 42274.2, 300 sec: 41987.4). Total num frames: 1112850432. Throughput: 0: 10435.3. Samples: 28211994. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:38,980][24592] Avg episode reward: [(0, '4.183')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:39,838][626795] Updated weights for policy 0, policy_version 135852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:41,771][626795] Updated weights for policy 0, policy_version 135862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:43,675][626795] Updated weights for policy 0, policy_version 135872 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:43,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42462.0, 300 sec: 41959.7). Total num frames: 1113071616. Throughput: 0: 10406.5. Samples: 28244898. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:43,977][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:45,470][626795] Updated weights for policy 0, policy_version 135882 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:49,139][24592] Fps is (10 sec: 36272.8, 60 sec: 40984.9, 300 sec: 41658.9). Total num frames: 1113219072. Throughput: 0: 9637.1. Samples: 28277544. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:49,140][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:49,268][626795] Updated weights for policy 0, policy_version 135892 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:51,079][626795] Updated weights for policy 0, policy_version 135902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:52,934][626795] Updated weights for policy 0, policy_version 135912 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:53,975][24592] Fps is (10 sec: 36044.9, 60 sec: 40960.0, 300 sec: 41626.5). Total num frames: 1113432064. Throughput: 0: 9994.6. Samples: 28356330. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:53,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:54,831][626795] Updated weights for policy 0, policy_version 135922 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:56,799][626795] Updated weights for policy 0, policy_version 135932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:04:58,740][626795] Updated weights for policy 0, policy_version 135942 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:58,981][24592] Fps is (10 sec: 43282.4, 60 sec: 40819.5, 300 sec: 41570.1). Total num frames: 1113645056. Throughput: 0: 10181.3. Samples: 28387830. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:04:58,982][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:00,617][626795] Updated weights for policy 0, policy_version 135952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:02,400][626795] Updated weights for policy 0, policy_version 135962 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:03,975][24592] Fps is (10 sec: 43417.5, 60 sec: 40823.6, 300 sec: 41543.3). Total num frames: 1113866240. Throughput: 0: 10449.9. Samples: 28452666. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:03,978][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000135970_1113866240.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:04,045][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000134759_1103945728.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:04,317][626795] Updated weights for policy 0, policy_version 135972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:06,282][626795] Updated weights for policy 0, policy_version 135982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:08,146][626795] Updated weights for policy 0, policy_version 135992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:08,975][24592] Fps is (10 sec: 43442.7, 60 sec: 40687.1, 300 sec: 41487.6). Total num frames: 1114079232. Throughput: 0: 10444.0. Samples: 28517760. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:08,978][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:10,159][626795] Updated weights for policy 0, policy_version 136002 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:12,007][626795] Updated weights for policy 0, policy_version 136012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:13,864][626795] Updated weights for policy 0, policy_version 136022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:13,976][24592] Fps is (10 sec: 43416.1, 60 sec: 42299.9, 300 sec: 41681.9). Total num frames: 1114300416. Throughput: 0: 10416.7. Samples: 28549776. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:13,977][24592] Avg episode reward: [(0, '4.353')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:15,701][626795] Updated weights for policy 0, policy_version 136032 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:17,577][626795] Updated weights for policy 0, policy_version 136042 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:18,975][24592] Fps is (10 sec: 43417.3, 60 sec: 42189.0, 300 sec: 41626.5). Total num frames: 1114513408. Throughput: 0: 10424.0. Samples: 28615428. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:18,977][24592] Avg episode reward: [(0, '4.258')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:19,534][626795] Updated weights for policy 0, policy_version 136052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:23,208][626795] Updated weights for policy 0, policy_version 136062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:23,975][24592] Fps is (10 sec: 34407.7, 60 sec: 40824.3, 300 sec: 41321.1). Total num frames: 1114644480. Throughput: 0: 9963.6. Samples: 28660350. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:23,977][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:25,037][626795] Updated weights for policy 0, policy_version 136072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:26,891][626795] Updated weights for policy 0, policy_version 136082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:28,748][626795] Updated weights for policy 0, policy_version 136092 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:28,975][24592] Fps is (10 sec: 36045.1, 60 sec: 40961.1, 300 sec: 41293.2). Total num frames: 1114873856. Throughput: 0: 9973.7. Samples: 28693716. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:28,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:30,447][626795] Updated weights for policy 0, policy_version 136102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:32,270][626795] Updated weights for policy 0, policy_version 136112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:33,976][24592] Fps is (10 sec: 45872.8, 60 sec: 41096.2, 300 sec: 41293.2). Total num frames: 1115103232. Throughput: 0: 10826.1. Samples: 28762950. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:33,978][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:34,024][626795] Updated weights for policy 0, policy_version 136122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:35,800][626795] Updated weights for policy 0, policy_version 136132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:37,680][626795] Updated weights for policy 0, policy_version 136142 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:38,976][24592] Fps is (10 sec: 45873.3, 60 sec: 41369.9, 300 sec: 41293.2). Total num frames: 1115332608. Throughput: 0: 10544.8. Samples: 28830852. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:38,976][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:39,583][626795] Updated weights for policy 0, policy_version 136152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:41,254][626795] Updated weights for policy 0, policy_version 136162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:43,091][626795] Updated weights for policy 0, policy_version 136172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:43,976][24592] Fps is (10 sec: 45055.4, 60 sec: 41369.1, 300 sec: 41321.0). Total num frames: 1115553792. Throughput: 0: 10592.4. Samples: 28864434. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:43,977][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:45,069][626795] Updated weights for policy 0, policy_version 136182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:46,988][626795] Updated weights for policy 0, policy_version 136192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:48,767][626795] Updated weights for policy 0, policy_version 136202 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:48,975][24592] Fps is (10 sec: 43419.1, 60 sec: 42578.1, 300 sec: 41543.2). Total num frames: 1115766784. Throughput: 0: 10604.5. Samples: 28929870. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:48,976][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:50,772][626795] Updated weights for policy 0, policy_version 136212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:52,553][626795] Updated weights for policy 0, policy_version 136222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:53,975][24592] Fps is (10 sec: 43420.5, 60 sec: 42598.4, 300 sec: 41570.9). Total num frames: 1115987968. Throughput: 0: 10595.9. Samples: 28994574. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:53,977][24592] Avg episode reward: [(0, '4.333')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:54,468][626795] Updated weights for policy 0, policy_version 136232 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:05:58,839][626795] Updated weights for policy 0, policy_version 136242 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:58,975][24592] Fps is (10 sec: 32768.3, 60 sec: 40827.4, 300 sec: 41182.2). Total num frames: 1116094464. Throughput: 0: 10318.1. Samples: 29014086. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:05:58,976][24592] Avg episode reward: [(0, '4.300')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:00,677][626795] Updated weights for policy 0, policy_version 136252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:02,664][626795] Updated weights for policy 0, policy_version 136262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:03,975][24592] Fps is (10 sec: 32767.7, 60 sec: 40823.4, 300 sec: 41154.4). Total num frames: 1116315648. Throughput: 0: 10008.8. Samples: 29065824. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:03,977][24592] Avg episode reward: [(0, '4.281')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:04,524][626795] Updated weights for policy 0, policy_version 136272 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:06,380][626795] Updated weights for policy 0, policy_version 136282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:08,154][626795] Updated weights for policy 0, policy_version 136292 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:08,975][24592] Fps is (10 sec: 44236.8, 60 sec: 40960.0, 300 sec: 41210.1). Total num frames: 1116536832. Throughput: 0: 10460.8. Samples: 29131086. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:08,976][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:10,133][626795] Updated weights for policy 0, policy_version 136302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:12,011][626795] Updated weights for policy 0, policy_version 136312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:13,862][626795] Updated weights for policy 0, policy_version 136322 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:13,975][24592] Fps is (10 sec: 43418.1, 60 sec: 40823.7, 300 sec: 41237.7). Total num frames: 1116749824. Throughput: 0: 10447.7. Samples: 29163864. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:13,977][24592] Avg episode reward: [(0, '4.346')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:15,653][626795] Updated weights for policy 0, policy_version 136332 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:17,522][626795] Updated weights for policy 0, policy_version 136342 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:18,977][24592] Fps is (10 sec: 43410.0, 60 sec: 40958.9, 300 sec: 41463.8). Total num frames: 1116971008. Throughput: 0: 10393.6. Samples: 29230674. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:18,978][24592] Avg episode reward: [(0, '4.432')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:19,382][626795] Updated weights for policy 0, policy_version 136352 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:21,178][626795] Updated weights for policy 0, policy_version 136362 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:23,095][626795] Updated weights for policy 0, policy_version 136372 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:23,975][24592] Fps is (10 sec: 44237.0, 60 sec: 42461.9, 300 sec: 41487.6). Total num frames: 1117192192. Throughput: 0: 10362.5. Samples: 29297160. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:23,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:24,890][626795] Updated weights for policy 0, policy_version 136382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:26,789][626795] Updated weights for policy 0, policy_version 136392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:28,669][626795] Updated weights for policy 0, policy_version 136402 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:28,976][24592] Fps is (10 sec: 44241.0, 60 sec: 42324.8, 300 sec: 41515.3). Total num frames: 1117413376. Throughput: 0: 10354.0. Samples: 29330364. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:28,977][24592] Avg episode reward: [(0, '4.477')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:32,659][626795] Updated weights for policy 0, policy_version 136412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:33,976][24592] Fps is (10 sec: 35223.6, 60 sec: 40687.0, 300 sec: 41209.8). Total num frames: 1117544448. Throughput: 0: 9844.8. Samples: 29372892. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:33,977][24592] Avg episode reward: [(0, '4.339')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:34,508][626795] Updated weights for policy 0, policy_version 136422 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:36,406][626795] Updated weights for policy 0, policy_version 136432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:38,197][626795] Updated weights for policy 0, policy_version 136442 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:38,980][24592] Fps is (10 sec: 35211.9, 60 sec: 40547.5, 300 sec: 41209.3). Total num frames: 1117765632. Throughput: 0: 9865.4. Samples: 29438562. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:38,982][24592] Avg episode reward: [(0, '4.382')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:40,170][626795] Updated weights for policy 0, policy_version 136452 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:41,986][626795] Updated weights for policy 0, policy_version 136462 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:43,920][626795] Updated weights for policy 0, policy_version 136472 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:43,975][24592] Fps is (10 sec: 43420.0, 60 sec: 40414.4, 300 sec: 41182.2). Total num frames: 1117978624. Throughput: 0: 10149.3. Samples: 29470806. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:43,977][24592] Avg episode reward: [(0, '4.351')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:45,542][626795] Updated weights for policy 0, policy_version 136482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:47,468][626795] Updated weights for policy 0, policy_version 136492 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:48,975][24592] Fps is (10 sec: 44257.7, 60 sec: 40687.0, 300 sec: 41237.7). Total num frames: 1118208000. Throughput: 0: 10512.3. Samples: 29538876. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:48,976][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:49,298][626795] Updated weights for policy 0, policy_version 136502 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:51,104][626795] Updated weights for policy 0, policy_version 136512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:52,965][626795] Updated weights for policy 0, policy_version 136522 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:53,975][24592] Fps is (10 sec: 45056.3, 60 sec: 40687.0, 300 sec: 41487.6). Total num frames: 1118429184. Throughput: 0: 10545.2. Samples: 29605620. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:53,976][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:54,814][626795] Updated weights for policy 0, policy_version 136532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:56,624][626795] Updated weights for policy 0, policy_version 136542 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:06:58,582][626795] Updated weights for policy 0, policy_version 136552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:58,975][24592] Fps is (10 sec: 43417.4, 60 sec: 42461.8, 300 sec: 41487.6). Total num frames: 1118642176. Throughput: 0: 10543.5. Samples: 29638320. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:06:58,976][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:00,490][626795] Updated weights for policy 0, policy_version 136562 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:02,332][626795] Updated weights for policy 0, policy_version 136572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:03,975][24592] Fps is (10 sec: 43416.9, 60 sec: 42461.9, 300 sec: 41515.4). Total num frames: 1118863360. Throughput: 0: 10511.3. Samples: 29703666. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:03,979][24592] Avg episode reward: [(0, '4.288')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000136580_1118863360.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:04,052][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000135369_1108942848.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:04,256][626795] Updated weights for policy 0, policy_version 136582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:08,315][626795] Updated weights for policy 0, policy_version 136592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:08,975][24592] Fps is (10 sec: 34406.3, 60 sec: 40823.4, 300 sec: 41154.4). Total num frames: 1118986240. Throughput: 0: 9953.6. Samples: 29745072. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:08,976][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:10,109][626795] Updated weights for policy 0, policy_version 136602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:12,029][626795] Updated weights for policy 0, policy_version 136612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:13,952][626795] Updated weights for policy 0, policy_version 136622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:13,975][24592] Fps is (10 sec: 34406.4, 60 sec: 40959.9, 300 sec: 41154.4). Total num frames: 1119207424. Throughput: 0: 9943.2. Samples: 29777802. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:13,978][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:15,813][626795] Updated weights for policy 0, policy_version 136632 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:17,596][626795] Updated weights for policy 0, policy_version 136642 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:18,975][24592] Fps is (10 sec: 44237.0, 60 sec: 40961.2, 300 sec: 41182.2). Total num frames: 1119428608. Throughput: 0: 10469.2. Samples: 29844000. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:18,978][24592] Avg episode reward: [(0, '4.325')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:19,459][626795] Updated weights for policy 0, policy_version 136652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:21,249][626795] Updated weights for policy 0, policy_version 136662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:22,998][626795] Updated weights for policy 0, policy_version 136672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:23,975][24592] Fps is (10 sec: 45056.9, 60 sec: 41096.6, 300 sec: 41237.7). Total num frames: 1119657984. Throughput: 0: 10526.3. Samples: 29912196. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:23,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:24,933][626795] Updated weights for policy 0, policy_version 136682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:26,800][626795] Updated weights for policy 0, policy_version 136692 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:28,511][626795] Updated weights for policy 0, policy_version 136702 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:28,975][24592] Fps is (10 sec: 44236.4, 60 sec: 40960.5, 300 sec: 41459.8). Total num frames: 1119870976. Throughput: 0: 10543.3. Samples: 29945256. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:28,976][24592] Avg episode reward: [(0, '4.351')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:30,465][626795] Updated weights for policy 0, policy_version 136712 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:32,422][626795] Updated weights for policy 0, policy_version 136722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:33,318][626772] Signal inference workers to stop experience collection... (450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:33,320][626772] Signal inference workers to resume experience collection... (450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:33,330][626795] InferenceWorker_p0-w0: stopping experience collection (450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:33,333][626795] InferenceWorker_p0-w0: resuming experience collection (450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:33,976][24592] Fps is (10 sec: 43416.5, 60 sec: 42462.1, 300 sec: 41487.7). Total num frames: 1120092160. Throughput: 0: 10492.2. Samples: 30011028. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:33,978][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:34,155][626795] Updated weights for policy 0, policy_version 136732 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:36,171][626795] Updated weights for policy 0, policy_version 136742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:37,978][626795] Updated weights for policy 0, policy_version 136752 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:40,967][24592] Fps is (10 sec: 36205.3, 60 sec: 40968.3, 300 sec: 41209.4). Total num frames: 1120305152. Throughput: 0: 9993.1. Samples: 30075216. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:40,968][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:42,104][626795] Updated weights for policy 0, policy_version 136762 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:43,975][24592] Fps is (10 sec: 33587.8, 60 sec: 40823.5, 300 sec: 41126.6). Total num frames: 1120428032. Throughput: 0: 9904.3. Samples: 30084012. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:43,976][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:43,995][626795] Updated weights for policy 0, policy_version 136772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:45,859][626795] Updated weights for policy 0, policy_version 136782 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:47,813][626795] Updated weights for policy 0, policy_version 136792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:48,976][24592] Fps is (10 sec: 42964.5, 60 sec: 40686.8, 300 sec: 41126.6). Total num frames: 1120649216. Throughput: 0: 9903.0. Samples: 30149304. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:48,978][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:49,692][626795] Updated weights for policy 0, policy_version 136802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:51,411][626795] Updated weights for policy 0, policy_version 136812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:53,240][626795] Updated weights for policy 0, policy_version 136822 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:53,975][24592] Fps is (10 sec: 45056.0, 60 sec: 40823.4, 300 sec: 41210.3). Total num frames: 1120878592. Throughput: 0: 10476.4. Samples: 30216510. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:53,976][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:55,038][626795] Updated weights for policy 0, policy_version 136832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:56,770][626795] Updated weights for policy 0, policy_version 136842 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:07:58,688][626795] Updated weights for policy 0, policy_version 136852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:58,975][24592] Fps is (10 sec: 45057.0, 60 sec: 40960.0, 300 sec: 41210.0). Total num frames: 1121099776. Throughput: 0: 10519.2. Samples: 30251166. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:07:58,977][24592] Avg episode reward: [(0, '4.343')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:00,426][626795] Updated weights for policy 0, policy_version 136862 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:02,593][626795] Updated weights for policy 0, policy_version 136872 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:03,976][24592] Fps is (10 sec: 43415.8, 60 sec: 40823.3, 300 sec: 41432.0). Total num frames: 1121312768. Throughput: 0: 10474.0. Samples: 30315336. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:03,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:04,431][626795] Updated weights for policy 0, policy_version 136882 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:06,214][626795] Updated weights for policy 0, policy_version 136892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:08,029][626795] Updated weights for policy 0, policy_version 136902 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:08,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42461.9, 300 sec: 41459.9). Total num frames: 1121533952. Throughput: 0: 10443.6. Samples: 30382158. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:08,976][24592] Avg episode reward: [(0, '4.326')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:10,030][626795] Updated weights for policy 0, policy_version 136912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:11,881][626795] Updated weights for policy 0, policy_version 136922 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:15,514][24592] Fps is (10 sec: 34788.1, 60 sec: 40734.4, 300 sec: 41106.5). Total num frames: 1121714176. Throughput: 0: 10082.1. Samples: 30414468. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:15,516][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:16,079][626795] Updated weights for policy 0, policy_version 136932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:17,960][626795] Updated weights for policy 0, policy_version 136942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:18,976][24592] Fps is (10 sec: 33584.0, 60 sec: 40686.3, 300 sec: 41071.1). Total num frames: 1121869824. Throughput: 0: 9867.4. Samples: 30455070. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:18,977][24592] Avg episode reward: [(0, '4.431')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:19,786][626795] Updated weights for policy 0, policy_version 136952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:21,619][626795] Updated weights for policy 0, policy_version 136962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:23,542][626795] Updated weights for policy 0, policy_version 136972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:23,975][24592] Fps is (10 sec: 44537.6, 60 sec: 40550.3, 300 sec: 41098.9). Total num frames: 1122091008. Throughput: 0: 10359.5. Samples: 30520758. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:23,976][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:25,374][626795] Updated weights for policy 0, policy_version 136982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:27,136][626795] Updated weights for policy 0, policy_version 136992 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:28,893][626795] Updated weights for policy 0, policy_version 137002 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:28,976][24592] Fps is (10 sec: 45059.4, 60 sec: 40823.4, 300 sec: 41154.4). Total num frames: 1122320384. Throughput: 0: 10454.6. Samples: 30554472. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:28,977][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:30,628][626795] Updated weights for policy 0, policy_version 137012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:32,492][626795] Updated weights for policy 0, policy_version 137022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:33,975][24592] Fps is (10 sec: 45875.3, 60 sec: 40960.1, 300 sec: 41472.8). Total num frames: 1122549760. Throughput: 0: 10559.4. Samples: 30624474. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:33,976][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:34,253][626795] Updated weights for policy 0, policy_version 137032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:36,090][626795] Updated weights for policy 0, policy_version 137042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:37,959][626795] Updated weights for policy 0, policy_version 137052 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:38,975][24592] Fps is (10 sec: 45056.8, 60 sec: 42507.9, 300 sec: 41515.4). Total num frames: 1122770944. Throughput: 0: 10553.7. Samples: 30691428. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:38,976][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:39,752][626795] Updated weights for policy 0, policy_version 137062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:41,600][626795] Updated weights for policy 0, policy_version 137072 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:43,520][626795] Updated weights for policy 0, policy_version 137082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:43,975][24592] Fps is (10 sec: 44236.9, 60 sec: 42734.9, 300 sec: 41487.7). Total num frames: 1122992128. Throughput: 0: 10512.5. Samples: 30724230. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:43,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:45,442][626795] Updated weights for policy 0, policy_version 137092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:47,186][626795] Updated weights for policy 0, policy_version 137102 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:50,165][24592] Fps is (10 sec: 34408.9, 60 sec: 40967.0, 300 sec: 41127.4). Total num frames: 1123155968. Throughput: 0: 10280.3. Samples: 30790176. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:50,167][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:51,542][626795] Updated weights for policy 0, policy_version 137112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:53,433][626795] Updated weights for policy 0, policy_version 137122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:53,975][24592] Fps is (10 sec: 32767.9, 60 sec: 40686.9, 300 sec: 41098.8). Total num frames: 1123319808. Throughput: 0: 9933.1. Samples: 30829146. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:53,977][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:55,379][626795] Updated weights for policy 0, policy_version 137132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:57,252][626795] Updated weights for policy 0, policy_version 137142 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:58,975][24592] Fps is (10 sec: 43702.0, 60 sec: 40687.0, 300 sec: 41098.9). Total num frames: 1123540992. Throughput: 0: 10299.2. Samples: 30862080. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:08:58,977][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:08:59,029][626795] Updated weights for policy 0, policy_version 137152 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:00,786][626795] Updated weights for policy 0, policy_version 137162 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:02,644][626795] Updated weights for policy 0, policy_version 137172 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:03,975][24592] Fps is (10 sec: 45055.7, 60 sec: 40960.2, 300 sec: 41126.6). Total num frames: 1123770368. Throughput: 0: 10544.6. Samples: 30929568. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:03,977][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000137179_1123770368.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:04,035][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000135970_1113866240.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:04,421][626795] Updated weights for policy 0, policy_version 137182 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:06,226][626795] Updated weights for policy 0, policy_version 137192 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:07,992][626795] Updated weights for policy 0, policy_version 137202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:08,977][24592] Fps is (10 sec: 45869.2, 60 sec: 41095.7, 300 sec: 41479.2). Total num frames: 1123999744. Throughput: 0: 10607.3. Samples: 30998100. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:08,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:09,780][626795] Updated weights for policy 0, policy_version 137212 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:11,546][626795] Updated weights for policy 0, policy_version 137222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:13,418][626795] Updated weights for policy 0, policy_version 137232 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:13,976][24592] Fps is (10 sec: 45051.8, 60 sec: 42878.3, 300 sec: 41487.5). Total num frames: 1124220928. Throughput: 0: 10614.9. Samples: 31032150. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:13,979][24592] Avg episode reward: [(0, '4.307')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:15,371][626795] Updated weights for policy 0, policy_version 137242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:17,056][626795] Updated weights for policy 0, policy_version 137252 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:18,951][626795] Updated weights for policy 0, policy_version 137262 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:18,977][24592] Fps is (10 sec: 45056.0, 60 sec: 43007.8, 300 sec: 41543.2). Total num frames: 1124450304. Throughput: 0: 10533.6. Samples: 31098498. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:18,978][24592] Avg episode reward: [(0, '4.387')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:20,937][626795] Updated weights for policy 0, policy_version 137272 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:24,820][24592] Fps is (10 sec: 34751.9, 60 sec: 41199.3, 300 sec: 41175.6). Total num frames: 1124597760. Throughput: 0: 9599.3. Samples: 31131504. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:24,821][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:25,135][626795] Updated weights for policy 0, policy_version 137282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:26,996][626795] Updated weights for policy 0, policy_version 137292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:28,927][626795] Updated weights for policy 0, policy_version 137302 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:28,976][24592] Fps is (10 sec: 32770.6, 60 sec: 40959.9, 300 sec: 41154.3). Total num frames: 1124777984. Throughput: 0: 9921.6. Samples: 31170708. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:28,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:30,927][626795] Updated weights for policy 0, policy_version 137312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:32,720][626795] Updated weights for policy 0, policy_version 137322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:33,975][24592] Fps is (10 sec: 42948.7, 60 sec: 40687.0, 300 sec: 41154.5). Total num frames: 1124990976. Throughput: 0: 10171.0. Samples: 31235772. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:33,977][24592] Avg episode reward: [(0, '4.399')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:34,609][626795] Updated weights for policy 0, policy_version 137332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:36,227][626795] Updated weights for policy 0, policy_version 137342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:38,095][626795] Updated weights for policy 0, policy_version 137352 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:38,975][24592] Fps is (10 sec: 45057.9, 60 sec: 40960.0, 300 sec: 41209.9). Total num frames: 1125228544. Throughput: 0: 10561.6. Samples: 31304418. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:38,976][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:39,890][626795] Updated weights for policy 0, policy_version 137362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:41,593][626795] Updated weights for policy 0, policy_version 137372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:43,330][626795] Updated weights for policy 0, policy_version 137382 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:43,975][24592] Fps is (10 sec: 46693.9, 60 sec: 41096.5, 300 sec: 41510.7). Total num frames: 1125457920. Throughput: 0: 10593.3. Samples: 31338780. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:43,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:45,195][626795] Updated weights for policy 0, policy_version 137392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:46,911][626795] Updated weights for policy 0, policy_version 137402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:48,705][626795] Updated weights for policy 0, policy_version 137412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:48,975][24592] Fps is (10 sec: 45875.2, 60 sec: 43042.3, 300 sec: 41543.2). Total num frames: 1125687296. Throughput: 0: 10639.5. Samples: 31408344. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:48,977][24592] Avg episode reward: [(0, '4.320')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:50,530][626795] Updated weights for policy 0, policy_version 137422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:52,424][626795] Updated weights for policy 0, policy_version 137432 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:53,975][24592] Fps is (10 sec: 44237.2, 60 sec: 43008.0, 300 sec: 41544.0). Total num frames: 1125900288. Throughput: 0: 10591.9. Samples: 31474722. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:53,976][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:54,280][626795] Updated weights for policy 0, policy_version 137442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:09:56,187][626795] Updated weights for policy 0, policy_version 137452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:59,652][24592] Fps is (10 sec: 32993.7, 60 sec: 41178.4, 300 sec: 41171.1). Total num frames: 1126039552. Throughput: 0: 10400.3. Samples: 31507188. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:09:59,653][24592] Avg episode reward: [(0, '4.290')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:00,637][626795] Updated weights for policy 0, policy_version 137462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:02,586][626795] Updated weights for policy 0, policy_version 137472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:03,975][24592] Fps is (10 sec: 31948.8, 60 sec: 40823.5, 300 sec: 41154.4). Total num frames: 1126219776. Throughput: 0: 9895.9. Samples: 31543800. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:03,977][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:04,538][626795] Updated weights for policy 0, policy_version 137482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:06,544][626795] Updated weights for policy 0, policy_version 137492 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:08,321][626795] Updated weights for policy 0, policy_version 137502 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:08,975][24592] Fps is (10 sec: 43053.1, 60 sec: 40687.8, 300 sec: 41154.4). Total num frames: 1126440960. Throughput: 0: 10798.4. Samples: 31608312. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:08,977][24592] Avg episode reward: [(0, '4.429')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:10,134][626795] Updated weights for policy 0, policy_version 137512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:11,815][626795] Updated weights for policy 0, policy_version 137522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:13,730][626795] Updated weights for policy 0, policy_version 137532 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:13,975][24592] Fps is (10 sec: 45055.9, 60 sec: 40824.2, 300 sec: 41209.9). Total num frames: 1126670336. Throughput: 0: 10494.1. Samples: 31642938. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:13,978][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:15,493][626795] Updated weights for policy 0, policy_version 137542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:17,247][626795] Updated weights for policy 0, policy_version 137552 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:18,975][24592] Fps is (10 sec: 45875.0, 60 sec: 40824.3, 300 sec: 41543.2). Total num frames: 1126899712. Throughput: 0: 10571.2. Samples: 31711476. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:18,976][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:18,982][626795] Updated weights for policy 0, policy_version 137562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:20,765][626795] Updated weights for policy 0, policy_version 137572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:22,660][626795] Updated weights for policy 0, policy_version 137582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:23,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42791.1, 300 sec: 41543.2). Total num frames: 1127129088. Throughput: 0: 10584.0. Samples: 31780698. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:23,977][24592] Avg episode reward: [(0, '4.508')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:24,349][626795] Updated weights for policy 0, policy_version 137592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:26,224][626795] Updated weights for policy 0, policy_version 137602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:28,140][626795] Updated weights for policy 0, policy_version 137612 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:28,976][24592] Fps is (10 sec: 45055.2, 60 sec: 42871.6, 300 sec: 41515.4). Total num frames: 1127350272. Throughput: 0: 10563.7. Samples: 31814148. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:28,978][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:29,957][626795] Updated weights for policy 0, policy_version 137622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:31,846][626795] Updated weights for policy 0, policy_version 137632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:34,333][24592] Fps is (10 sec: 34007.8, 60 sec: 41259.9, 300 sec: 41132.3). Total num frames: 1127481344. Throughput: 0: 9671.4. Samples: 31847022. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:34,338][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:36,168][626795] Updated weights for policy 0, policy_version 137642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:38,052][626795] Updated weights for policy 0, policy_version 137652 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:38,975][24592] Fps is (10 sec: 32768.7, 60 sec: 40823.5, 300 sec: 41099.0). Total num frames: 1127677952. Throughput: 0: 9866.8. Samples: 31918728. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:38,976][24592] Avg episode reward: [(0, '4.337')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:39,910][626795] Updated weights for policy 0, policy_version 137662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:41,878][626795] Updated weights for policy 0, policy_version 137672 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:43,882][626795] Updated weights for policy 0, policy_version 137682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:43,975][24592] Fps is (10 sec: 42480.6, 60 sec: 40550.4, 300 sec: 41098.8). Total num frames: 1127890944. Throughput: 0: 9989.9. Samples: 31949976. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:43,976][24592] Avg episode reward: [(0, '4.393')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:45,634][626795] Updated weights for policy 0, policy_version 137692 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:47,377][626795] Updated weights for policy 0, policy_version 137702 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:48,975][24592] Fps is (10 sec: 44236.2, 60 sec: 40550.3, 300 sec: 41126.6). Total num frames: 1128120320. Throughput: 0: 10526.2. Samples: 32017482. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:48,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:49,148][626795] Updated weights for policy 0, policy_version 137712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:51,055][626795] Updated weights for policy 0, policy_version 137722 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:52,751][626795] Updated weights for policy 0, policy_version 137732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:53,975][24592] Fps is (10 sec: 45875.8, 60 sec: 40823.5, 300 sec: 41543.2). Total num frames: 1128349696. Throughput: 0: 10621.5. Samples: 32086278. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:53,977][24592] Avg episode reward: [(0, '4.240')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:54,463][626795] Updated weights for policy 0, policy_version 137742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:56,279][626795] Updated weights for policy 0, policy_version 137752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:58,180][626795] Updated weights for policy 0, policy_version 137762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:58,976][24592] Fps is (10 sec: 46694.5, 60 sec: 42945.9, 300 sec: 41598.7). Total num frames: 1128587264. Throughput: 0: 10623.3. Samples: 32120988. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:10:58,979][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:10:59,814][626795] Updated weights for policy 0, policy_version 137772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:01,734][626795] Updated weights for policy 0, policy_version 137782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:03,599][626795] Updated weights for policy 0, policy_version 137792 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:03,976][24592] Fps is (10 sec: 45873.2, 60 sec: 43144.2, 300 sec: 41598.6). Total num frames: 1128808448. Throughput: 0: 10597.9. Samples: 32188386. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:03,977][24592] Avg episode reward: [(0, '4.421')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000137794_1128808448.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:04,038][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000136580_1118863360.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:05,455][626795] Updated weights for policy 0, policy_version 137802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:09,124][24592] Fps is (10 sec: 32287.2, 60 sec: 41130.9, 300 sec: 41216.9). Total num frames: 1128914944. Throughput: 0: 9747.9. Samples: 32220804. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:09,126][24592] Avg episode reward: [(0, '4.311')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:09,879][626795] Updated weights for policy 0, policy_version 137812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:11,896][626795] Updated weights for policy 0, policy_version 137822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:13,775][626795] Updated weights for policy 0, policy_version 137832 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:13,975][24592] Fps is (10 sec: 31949.9, 60 sec: 40960.0, 300 sec: 41210.2). Total num frames: 1129127936. Throughput: 0: 9873.0. Samples: 32258430. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:13,976][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:15,649][626795] Updated weights for policy 0, policy_version 137842 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:17,575][626795] Updated weights for policy 0, policy_version 137852 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:18,976][24592] Fps is (10 sec: 44070.5, 60 sec: 40822.9, 300 sec: 41209.8). Total num frames: 1129349120. Throughput: 0: 10656.6. Samples: 32322762. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:18,978][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:19,317][626795] Updated weights for policy 0, policy_version 137862 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:21,102][626795] Updated weights for policy 0, policy_version 137872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:22,795][626795] Updated weights for policy 0, policy_version 137882 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:23,976][24592] Fps is (10 sec: 45055.1, 60 sec: 40823.3, 300 sec: 41237.8). Total num frames: 1129578496. Throughput: 0: 10519.9. Samples: 32392128. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:23,980][24592] Avg episode reward: [(0, '4.356')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:24,609][626795] Updated weights for policy 0, policy_version 137892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:26,362][626795] Updated weights for policy 0, policy_version 137902 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:28,128][626795] Updated weights for policy 0, policy_version 137912 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:28,975][24592] Fps is (10 sec: 45879.4, 60 sec: 40960.2, 300 sec: 41571.0). Total num frames: 1129807872. Throughput: 0: 10611.8. Samples: 32427504. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:28,976][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:29,826][626795] Updated weights for policy 0, policy_version 137922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:31,617][626795] Updated weights for policy 0, policy_version 137932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:33,392][626795] Updated weights for policy 0, policy_version 137942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:33,975][24592] Fps is (10 sec: 46695.5, 60 sec: 42991.5, 300 sec: 41627.1). Total num frames: 1130045440. Throughput: 0: 10665.2. Samples: 32497416. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:33,976][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:35,267][626795] Updated weights for policy 0, policy_version 137952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:37,147][626795] Updated weights for policy 0, policy_version 137962 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:38,884][626795] Updated weights for policy 0, policy_version 137972 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:38,975][24592] Fps is (10 sec: 45874.9, 60 sec: 43144.5, 300 sec: 41654.2). Total num frames: 1130266624. Throughput: 0: 10618.4. Samples: 32564106. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:38,976][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:40,771][626795] Updated weights for policy 0, policy_version 137982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:43,975][24592] Fps is (10 sec: 32768.0, 60 sec: 41369.7, 300 sec: 41237.7). Total num frames: 1130373120. Throughput: 0: 10518.0. Samples: 32594298. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:43,977][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:45,184][626795] Updated weights for policy 0, policy_version 137992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:47,156][626795] Updated weights for policy 0, policy_version 138002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:48,922][626795] Updated weights for policy 0, policy_version 138012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:48,976][24592] Fps is (10 sec: 32767.6, 60 sec: 41233.0, 300 sec: 41237.7). Total num frames: 1130594304. Throughput: 0: 9922.6. Samples: 32634900. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:48,977][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:50,856][626795] Updated weights for policy 0, policy_version 138022 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:52,691][626795] Updated weights for policy 0, policy_version 138032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:53,975][24592] Fps is (10 sec: 44237.0, 60 sec: 41096.5, 300 sec: 41265.5). Total num frames: 1130815488. Throughput: 0: 10717.0. Samples: 32701470. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:53,977][24592] Avg episode reward: [(0, '4.422')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:54,495][626795] Updated weights for policy 0, policy_version 138042 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:56,158][626795] Updated weights for policy 0, policy_version 138052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:57,998][626795] Updated weights for policy 0, policy_version 138062 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:58,975][24592] Fps is (10 sec: 45056.4, 60 sec: 40960.0, 300 sec: 41293.2). Total num frames: 1131044864. Throughput: 0: 10621.9. Samples: 32736414. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:11:58,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:11:59,732][626795] Updated weights for policy 0, policy_version 138072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:01,501][626795] Updated weights for policy 0, policy_version 138082 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:03,208][626795] Updated weights for policy 0, policy_version 138092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:03,975][24592] Fps is (10 sec: 46694.1, 60 sec: 41233.3, 300 sec: 41682.0). Total num frames: 1131282432. Throughput: 0: 10733.8. Samples: 32805774. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:03,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:05,124][626795] Updated weights for policy 0, policy_version 138102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:06,823][626795] Updated weights for policy 0, policy_version 138112 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:08,510][626795] Updated weights for policy 0, policy_version 138122 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:08,975][24592] Fps is (10 sec: 45875.5, 60 sec: 43251.9, 300 sec: 41682.0). Total num frames: 1131503616. Throughput: 0: 10741.0. Samples: 32875470. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:08,977][24592] Avg episode reward: [(0, '4.340')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:10,491][626795] Updated weights for policy 0, policy_version 138132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:12,276][626795] Updated weights for policy 0, policy_version 138142 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:13,976][24592] Fps is (10 sec: 45055.0, 60 sec: 43417.5, 300 sec: 41709.7). Total num frames: 1131732992. Throughput: 0: 10687.5. Samples: 32908446. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:13,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:14,116][626795] Updated weights for policy 0, policy_version 138152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:15,979][626795] Updated weights for policy 0, policy_version 138162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:18,977][24592] Fps is (10 sec: 33583.1, 60 sec: 41505.9, 300 sec: 41293.0). Total num frames: 1131839488. Throughput: 0: 10248.3. Samples: 32958600. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:18,978][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:20,500][626795] Updated weights for policy 0, policy_version 138172 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:22,344][626795] Updated weights for policy 0, policy_version 138182 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:23,975][24592] Fps is (10 sec: 31949.7, 60 sec: 41233.3, 300 sec: 41293.2). Total num frames: 1132052480. Throughput: 0: 9939.3. Samples: 33011376. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:23,977][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:24,296][626795] Updated weights for policy 0, policy_version 138192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:26,228][626795] Updated weights for policy 0, policy_version 138202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:27,990][626795] Updated weights for policy 0, policy_version 138212 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:28,975][24592] Fps is (10 sec: 43423.1, 60 sec: 41096.5, 300 sec: 41293.3). Total num frames: 1132273664. Throughput: 0: 9978.7. Samples: 33043338. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:28,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:29,675][626795] Updated weights for policy 0, policy_version 138222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:31,497][626795] Updated weights for policy 0, policy_version 138232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:33,150][626795] Updated weights for policy 0, policy_version 138242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:33,975][24592] Fps is (10 sec: 45875.4, 60 sec: 41096.6, 300 sec: 41657.9). Total num frames: 1132511232. Throughput: 0: 10630.2. Samples: 33113256. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:33,977][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:35,050][626795] Updated weights for policy 0, policy_version 138252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:36,667][626795] Updated weights for policy 0, policy_version 138262 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:38,458][626795] Updated weights for policy 0, policy_version 138272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:38,976][24592] Fps is (10 sec: 46691.5, 60 sec: 41232.7, 300 sec: 41737.5). Total num frames: 1132740608. Throughput: 0: 10708.4. Samples: 33183354. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:38,977][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:40,255][626795] Updated weights for policy 0, policy_version 138282 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:42,075][626795] Updated weights for policy 0, policy_version 138292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:43,761][626795] Updated weights for policy 0, policy_version 138302 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:43,976][24592] Fps is (10 sec: 45873.3, 60 sec: 43280.8, 300 sec: 41765.3). Total num frames: 1132969984. Throughput: 0: 10702.1. Samples: 33218010. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:43,977][24592] Avg episode reward: [(0, '4.386')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:45,645][626795] Updated weights for policy 0, policy_version 138312 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:47,484][626795] Updated weights for policy 0, policy_version 138322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:48,975][24592] Fps is (10 sec: 45058.7, 60 sec: 43281.2, 300 sec: 41737.5). Total num frames: 1133191168. Throughput: 0: 10677.9. Samples: 33286278. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:48,978][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:49,396][626795] Updated weights for policy 0, policy_version 138332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:53,691][626795] Updated weights for policy 0, policy_version 138342 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:53,975][24592] Fps is (10 sec: 33588.4, 60 sec: 41506.1, 300 sec: 41376.5). Total num frames: 1133305856. Throughput: 0: 9988.8. Samples: 33324966. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:53,977][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:55,642][626795] Updated weights for policy 0, policy_version 138352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:57,535][626795] Updated weights for policy 0, policy_version 138362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:58,976][24592] Fps is (10 sec: 32765.7, 60 sec: 41232.6, 300 sec: 41376.5). Total num frames: 1133518848. Throughput: 0: 9964.2. Samples: 33356838. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:12:58,977][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:12:59,447][626795] Updated weights for policy 0, policy_version 138372 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:01,277][626795] Updated weights for policy 0, policy_version 138382 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:03,039][626795] Updated weights for policy 0, policy_version 138392 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:03,975][24592] Fps is (10 sec: 44237.3, 60 sec: 41096.6, 300 sec: 41404.3). Total num frames: 1133748224. Throughput: 0: 10324.4. Samples: 33423186. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:03,977][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000138397_1133748224.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:04,032][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000137179_1123770368.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:04,874][626795] Updated weights for policy 0, policy_version 138402 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:06,545][626795] Updated weights for policy 0, policy_version 138412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:08,283][626772] Signal inference workers to stop experience collection... (500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:08,284][626772] Signal inference workers to resume experience collection... (500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:08,299][626795] InferenceWorker_p0-w0: stopping experience collection (500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:08,301][626795] InferenceWorker_p0-w0: resuming experience collection (500 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:08,321][626795] Updated weights for policy 0, policy_version 138422 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:08,975][24592] Fps is (10 sec: 45878.2, 60 sec: 41233.0, 300 sec: 41788.9). Total num frames: 1133977600. Throughput: 0: 10685.7. Samples: 33492234. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:08,979][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:10,119][626795] Updated weights for policy 0, policy_version 138432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:11,839][626795] Updated weights for policy 0, policy_version 138442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:13,650][626795] Updated weights for policy 0, policy_version 138452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:13,975][24592] Fps is (10 sec: 46694.2, 60 sec: 41369.8, 300 sec: 41848.8). Total num frames: 1134215168. Throughput: 0: 10751.7. Samples: 33527166. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:13,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:15,376][626795] Updated weights for policy 0, policy_version 138462 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:17,240][626795] Updated weights for policy 0, policy_version 138472 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:18,976][24592] Fps is (10 sec: 45873.2, 60 sec: 43281.6, 300 sec: 41848.6). Total num frames: 1134436352. Throughput: 0: 10742.1. Samples: 33596658. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:18,977][24592] Avg episode reward: [(0, '4.422')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:19,015][626795] Updated weights for policy 0, policy_version 138482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:20,842][626795] Updated weights for policy 0, policy_version 138492 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:22,677][626795] Updated weights for policy 0, policy_version 138502 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:23,976][24592] Fps is (10 sec: 44235.4, 60 sec: 43417.4, 300 sec: 41820.8). Total num frames: 1134657536. Throughput: 0: 10671.9. Samples: 33663588. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:23,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:24,593][626795] Updated weights for policy 0, policy_version 138512 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:28,925][626795] Updated weights for policy 0, policy_version 138522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:28,975][24592] Fps is (10 sec: 33588.7, 60 sec: 41642.6, 300 sec: 41432.1). Total num frames: 1134772224. Throughput: 0: 10379.8. Samples: 33685098. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:28,977][24592] Avg episode reward: [(0, '4.493')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:30,865][626795] Updated weights for policy 0, policy_version 138532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:32,754][626795] Updated weights for policy 0, policy_version 138542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:33,975][24592] Fps is (10 sec: 32768.8, 60 sec: 41233.0, 300 sec: 41404.3). Total num frames: 1134985216. Throughput: 0: 9944.9. Samples: 33733800. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:33,976][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:34,693][626795] Updated weights for policy 0, policy_version 138552 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:36,430][626795] Updated weights for policy 0, policy_version 138562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:38,216][626795] Updated weights for policy 0, policy_version 138572 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:38,975][24592] Fps is (10 sec: 44237.1, 60 sec: 41233.5, 300 sec: 41432.1). Total num frames: 1135214592. Throughput: 0: 10586.7. Samples: 33801366. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:38,976][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:40,035][626795] Updated weights for policy 0, policy_version 138582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:41,665][626795] Updated weights for policy 0, policy_version 138592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:43,491][626795] Updated weights for policy 0, policy_version 138602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:43,976][24592] Fps is (10 sec: 45874.1, 60 sec: 41233.2, 300 sec: 41822.9). Total num frames: 1135443968. Throughput: 0: 10651.0. Samples: 33836130. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:43,976][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:45,281][626795] Updated weights for policy 0, policy_version 138612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:47,001][626795] Updated weights for policy 0, policy_version 138622 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:48,803][626795] Updated weights for policy 0, policy_version 138632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:48,975][24592] Fps is (10 sec: 46694.4, 60 sec: 41506.2, 300 sec: 41904.2). Total num frames: 1135681536. Throughput: 0: 10739.2. Samples: 33906450. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:48,976][24592] Avg episode reward: [(0, '4.349')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:50,514][626795] Updated weights for policy 0, policy_version 138642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:52,313][626795] Updated weights for policy 0, policy_version 138652 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:53,975][24592] Fps is (10 sec: 46695.6, 60 sec: 43417.6, 300 sec: 41931.9). Total num frames: 1135910912. Throughput: 0: 10738.1. Samples: 33975450. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:53,978][24592] Avg episode reward: [(0, '4.433')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:54,136][626795] Updated weights for policy 0, policy_version 138662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:55,899][626795] Updated weights for policy 0, policy_version 138672 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:57,765][626795] Updated weights for policy 0, policy_version 138682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:58,975][24592] Fps is (10 sec: 45055.8, 60 sec: 43554.6, 300 sec: 41904.2). Total num frames: 1136132096. Throughput: 0: 10700.5. Samples: 34008690. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:13:58,977][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:13:59,542][626795] Updated weights for policy 0, policy_version 138692 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:03,975][24592] Fps is (10 sec: 32768.1, 60 sec: 41506.1, 300 sec: 41487.8). Total num frames: 1136238592. Throughput: 0: 10073.2. Samples: 34049946. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:03,977][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:04,081][626795] Updated weights for policy 0, policy_version 138702 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:05,931][626795] Updated weights for policy 0, policy_version 138712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:07,857][626795] Updated weights for policy 0, policy_version 138722 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:08,975][24592] Fps is (10 sec: 32767.9, 60 sec: 41369.6, 300 sec: 41487.8). Total num frames: 1136459776. Throughput: 0: 9973.1. Samples: 34112376. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:08,978][24592] Avg episode reward: [(0, '4.288')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:09,714][626795] Updated weights for policy 0, policy_version 138732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:11,586][626795] Updated weights for policy 0, policy_version 138742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:13,304][626795] Updated weights for policy 0, policy_version 138752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:13,975][24592] Fps is (10 sec: 44236.8, 60 sec: 41096.5, 300 sec: 41460.0). Total num frames: 1136680960. Throughput: 0: 10227.3. Samples: 34145328. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:13,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:15,092][626795] Updated weights for policy 0, policy_version 138762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:16,802][626795] Updated weights for policy 0, policy_version 138772 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:18,628][626795] Updated weights for policy 0, policy_version 138782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:18,976][24592] Fps is (10 sec: 45054.7, 60 sec: 41233.2, 300 sec: 41857.3). Total num frames: 1136910336. Throughput: 0: 10692.7. Samples: 34214976. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:18,977][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:20,435][626795] Updated weights for policy 0, policy_version 138792 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:22,088][626795] Updated weights for policy 0, policy_version 138802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:23,880][626795] Updated weights for policy 0, policy_version 138812 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:23,975][24592] Fps is (10 sec: 46694.2, 60 sec: 41506.3, 300 sec: 41932.0). Total num frames: 1137147904. Throughput: 0: 10747.2. Samples: 34284990. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:23,977][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:25,678][626795] Updated weights for policy 0, policy_version 138822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:27,437][626795] Updated weights for policy 0, policy_version 138832 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:28,975][24592] Fps is (10 sec: 46695.7, 60 sec: 43417.6, 300 sec: 41987.5). Total num frames: 1137377280. Throughput: 0: 10742.6. Samples: 34319544. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:28,977][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:29,277][626795] Updated weights for policy 0, policy_version 138842 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:31,196][626795] Updated weights for policy 0, policy_version 138852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:33,002][626795] Updated weights for policy 0, policy_version 138862 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:33,975][24592] Fps is (10 sec: 45056.0, 60 sec: 43554.1, 300 sec: 41931.9). Total num frames: 1137598464. Throughput: 0: 10656.9. Samples: 34386012. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:33,977][24592] Avg episode reward: [(0, '4.317')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:34,929][626795] Updated weights for policy 0, policy_version 138872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:38,975][24592] Fps is (10 sec: 32768.1, 60 sec: 41506.1, 300 sec: 41515.4). Total num frames: 1137704960. Throughput: 0: 9977.1. Samples: 34424418. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:38,976][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:39,281][626795] Updated weights for policy 0, policy_version 138882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:41,213][626795] Updated weights for policy 0, policy_version 138892 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:43,033][626795] Updated weights for policy 0, policy_version 138902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:43,976][24592] Fps is (10 sec: 31948.3, 60 sec: 41233.1, 300 sec: 41459.8). Total num frames: 1137917952. Throughput: 0: 9961.3. Samples: 34456950. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:43,976][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:44,929][626795] Updated weights for policy 0, policy_version 138912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:46,692][626795] Updated weights for policy 0, policy_version 138922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:48,483][626795] Updated weights for policy 0, policy_version 138932 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:48,975][24592] Fps is (10 sec: 45056.2, 60 sec: 41233.1, 300 sec: 41543.2). Total num frames: 1138155520. Throughput: 0: 10540.0. Samples: 34524246. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:48,976][24592] Avg episode reward: [(0, '4.291')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:50,213][626795] Updated weights for policy 0, policy_version 138942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:51,990][626795] Updated weights for policy 0, policy_version 138952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:53,780][626795] Updated weights for policy 0, policy_version 138962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:53,975][24592] Fps is (10 sec: 46695.2, 60 sec: 41233.1, 300 sec: 41944.8). Total num frames: 1138384896. Throughput: 0: 10695.9. Samples: 34593690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:53,976][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:55,550][626795] Updated weights for policy 0, policy_version 138972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:57,266][626795] Updated weights for policy 0, policy_version 138982 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:58,975][24592] Fps is (10 sec: 45874.9, 60 sec: 41369.6, 300 sec: 42015.2). Total num frames: 1138614272. Throughput: 0: 10728.9. Samples: 34628130. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:14:58,977][24592] Avg episode reward: [(0, '4.281')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:14:59,134][626795] Updated weights for policy 0, policy_version 138992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:00,891][626795] Updated weights for policy 0, policy_version 139002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:02,637][626795] Updated weights for policy 0, policy_version 139012 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:03,975][24592] Fps is (10 sec: 45055.7, 60 sec: 43281.0, 300 sec: 42015.2). Total num frames: 1138835456. Throughput: 0: 10711.1. Samples: 34696974. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:03,978][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:03,988][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000139019_1138843648.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:04,069][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000137794_1128808448.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:04,535][626795] Updated weights for policy 0, policy_version 139022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:06,368][626795] Updated weights for policy 0, policy_version 139032 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:08,296][626795] Updated weights for policy 0, policy_version 139042 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:08,976][24592] Fps is (10 sec: 44235.1, 60 sec: 43280.8, 300 sec: 41987.4). Total num frames: 1139056640. Throughput: 0: 10625.9. Samples: 34763160. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:08,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:12,624][626795] Updated weights for policy 0, policy_version 139052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:13,975][24592] Fps is (10 sec: 33587.5, 60 sec: 41506.1, 300 sec: 41598.7). Total num frames: 1139171328. Throughput: 0: 10158.9. Samples: 34776696. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:13,977][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:14,606][626795] Updated weights for policy 0, policy_version 139062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:16,466][626795] Updated weights for policy 0, policy_version 139072 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:18,356][626795] Updated weights for policy 0, policy_version 139082 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:18,975][24592] Fps is (10 sec: 31949.8, 60 sec: 41096.7, 300 sec: 41515.4). Total num frames: 1139376128. Throughput: 0: 9930.0. Samples: 34832862. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:18,976][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:20,181][626795] Updated weights for policy 0, policy_version 139092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:21,931][626795] Updated weights for policy 0, policy_version 139102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:23,656][626795] Updated weights for policy 0, policy_version 139112 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:23,975][24592] Fps is (10 sec: 44236.6, 60 sec: 41096.5, 300 sec: 41571.0). Total num frames: 1139613696. Throughput: 0: 10595.2. Samples: 34901202. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:23,977][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:25,522][626795] Updated weights for policy 0, policy_version 139122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:27,265][626795] Updated weights for policy 0, policy_version 139132 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:28,970][626795] Updated weights for policy 0, policy_version 139142 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:28,975][24592] Fps is (10 sec: 47514.1, 60 sec: 41233.1, 300 sec: 41982.9). Total num frames: 1139851264. Throughput: 0: 10642.4. Samples: 34935858. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:28,977][24592] Avg episode reward: [(0, '4.450')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:30,755][626795] Updated weights for policy 0, policy_version 139152 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:32,537][626795] Updated weights for policy 0, policy_version 139162 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:33,976][24592] Fps is (10 sec: 47511.6, 60 sec: 41505.8, 300 sec: 42070.7). Total num frames: 1140088832. Throughput: 0: 10708.0. Samples: 35006112. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:33,978][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:34,381][626795] Updated weights for policy 0, policy_version 139172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:36,031][626795] Updated weights for policy 0, policy_version 139182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:37,871][626795] Updated weights for policy 0, policy_version 139192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:38,975][24592] Fps is (10 sec: 45055.4, 60 sec: 43281.0, 300 sec: 42070.8). Total num frames: 1140301824. Throughput: 0: 10678.4. Samples: 35074218. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:38,978][24592] Avg episode reward: [(0, '4.463')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:39,750][626795] Updated weights for policy 0, policy_version 139202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:41,558][626795] Updated weights for policy 0, policy_version 139212 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:43,397][626795] Updated weights for policy 0, policy_version 139222 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:43,976][24592] Fps is (10 sec: 43418.3, 60 sec: 43417.5, 300 sec: 42043.0). Total num frames: 1140523008. Throughput: 0: 10664.6. Samples: 35108040. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:43,976][24592] Avg episode reward: [(0, '4.286')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:47,827][626795] Updated weights for policy 0, policy_version 139232 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:48,975][24592] Fps is (10 sec: 32768.4, 60 sec: 41233.0, 300 sec: 41626.5). Total num frames: 1140629504. Throughput: 0: 9966.7. Samples: 35145474. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:48,978][24592] Avg episode reward: [(0, '4.232')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:49,981][626795] Updated weights for policy 0, policy_version 139242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:51,882][626795] Updated weights for policy 0, policy_version 139252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:53,773][626795] Updated weights for policy 0, policy_version 139262 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:53,975][24592] Fps is (10 sec: 31949.5, 60 sec: 40960.0, 300 sec: 41543.2). Total num frames: 1140842496. Throughput: 0: 9882.3. Samples: 35207862. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:53,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:55,619][626795] Updated weights for policy 0, policy_version 139272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:57,291][626795] Updated weights for policy 0, policy_version 139282 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:58,975][24592] Fps is (10 sec: 44236.5, 60 sec: 40959.9, 300 sec: 41571.0). Total num frames: 1141071872. Throughput: 0: 10345.3. Samples: 35242236. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:15:58,977][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:15:59,046][626795] Updated weights for policy 0, policy_version 139292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:00,849][626795] Updated weights for policy 0, policy_version 139302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:02,615][626795] Updated weights for policy 0, policy_version 139312 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:03,975][24592] Fps is (10 sec: 45875.5, 60 sec: 41096.6, 300 sec: 42008.7). Total num frames: 1141301248. Throughput: 0: 10661.9. Samples: 35312646. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:03,979][24592] Avg episode reward: [(0, '4.394')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:04,252][626795] Updated weights for policy 0, policy_version 139322 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:06,132][626795] Updated weights for policy 0, policy_version 139332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:07,920][626795] Updated weights for policy 0, policy_version 139342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:08,976][24592] Fps is (10 sec: 46693.6, 60 sec: 41369.7, 300 sec: 42070.8). Total num frames: 1141538816. Throughput: 0: 10682.7. Samples: 35381928. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:08,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:09,625][626795] Updated weights for policy 0, policy_version 139352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:11,430][626795] Updated weights for policy 0, policy_version 139362 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:13,302][626795] Updated weights for policy 0, policy_version 139372 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:13,976][24592] Fps is (10 sec: 45872.9, 60 sec: 43144.2, 300 sec: 42070.8). Total num frames: 1141760000. Throughput: 0: 10676.7. Samples: 35416314. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:13,977][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:15,234][626795] Updated weights for policy 0, policy_version 139382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:16,917][626795] Updated weights for policy 0, policy_version 139392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:18,792][626795] Updated weights for policy 0, policy_version 139402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:18,976][24592] Fps is (10 sec: 45056.4, 60 sec: 43554.1, 300 sec: 42070.8). Total num frames: 1141989376. Throughput: 0: 10618.7. Samples: 35483952. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:18,977][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:23,359][626795] Updated weights for policy 0, policy_version 139412 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:23,975][24592] Fps is (10 sec: 32769.4, 60 sec: 41233.0, 300 sec: 41626.5). Total num frames: 1142087680. Throughput: 0: 9910.3. Samples: 35520180. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:23,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:25,097][626795] Updated weights for policy 0, policy_version 139422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:27,031][626795] Updated weights for policy 0, policy_version 139432 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:28,717][626795] Updated weights for policy 0, policy_version 139442 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:28,975][24592] Fps is (10 sec: 32768.8, 60 sec: 41096.6, 300 sec: 41598.7). Total num frames: 1142317056. Throughput: 0: 9889.7. Samples: 35553072. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:28,976][24592] Avg episode reward: [(0, '4.402')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:30,411][626795] Updated weights for policy 0, policy_version 139452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:32,186][626795] Updated weights for policy 0, policy_version 139462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:33,829][626795] Updated weights for policy 0, policy_version 139472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:33,976][24592] Fps is (10 sec: 46693.6, 60 sec: 41096.7, 300 sec: 41654.2). Total num frames: 1142554624. Throughput: 0: 10650.5. Samples: 35624748. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:33,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:35,563][626795] Updated weights for policy 0, policy_version 139482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:37,185][626795] Updated weights for policy 0, policy_version 139492 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:38,931][626795] Updated weights for policy 0, policy_version 139502 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:38,975][24592] Fps is (10 sec: 48332.4, 60 sec: 41642.8, 300 sec: 42126.3). Total num frames: 1142800384. Throughput: 0: 10866.1. Samples: 35696838. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:38,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:40,748][626795] Updated weights for policy 0, policy_version 139512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:42,448][626795] Updated weights for policy 0, policy_version 139522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:43,975][24592] Fps is (10 sec: 47514.7, 60 sec: 41779.4, 300 sec: 42154.1). Total num frames: 1143029760. Throughput: 0: 10887.1. Samples: 35732154. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:43,976][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:44,176][626795] Updated weights for policy 0, policy_version 139532 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:45,974][626795] Updated weights for policy 0, policy_version 139542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:47,872][626795] Updated weights for policy 0, policy_version 139552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:48,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43827.2, 300 sec: 42181.9). Total num frames: 1143259136. Throughput: 0: 10855.1. Samples: 35801124. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:48,977][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:49,655][626795] Updated weights for policy 0, policy_version 139562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:51,437][626795] Updated weights for policy 0, policy_version 139572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:53,239][626795] Updated weights for policy 0, policy_version 139582 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:53,989][24592] Fps is (10 sec: 44994.5, 60 sec: 43953.8, 300 sec: 42152.1). Total num frames: 1143480320. Throughput: 0: 10808.4. Samples: 35868450. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:53,990][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:57,495][626795] Updated weights for policy 0, policy_version 139592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:58,976][24592] Fps is (10 sec: 33585.8, 60 sec: 42052.1, 300 sec: 41737.5). Total num frames: 1143595008. Throughput: 0: 10203.2. Samples: 35875458. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:16:58,977][24592] Avg episode reward: [(0, '4.391')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:16:59,390][626795] Updated weights for policy 0, policy_version 139602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:01,243][626795] Updated weights for policy 0, policy_version 139612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:03,006][626795] Updated weights for policy 0, policy_version 139622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:03,975][24592] Fps is (10 sec: 34453.3, 60 sec: 42052.2, 300 sec: 41765.3). Total num frames: 1143824384. Throughput: 0: 10184.7. Samples: 35942262. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:03,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:03,987][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000139628_1143832576.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:04,044][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000138397_1133748224.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:04,739][626795] Updated weights for policy 0, policy_version 139632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:06,428][626795] Updated weights for policy 0, policy_version 139642 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:08,127][626795] Updated weights for policy 0, policy_version 139652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:08,975][24592] Fps is (10 sec: 47515.5, 60 sec: 42189.0, 300 sec: 41820.9). Total num frames: 1144070144. Throughput: 0: 10988.9. Samples: 36014682. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:08,976][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:09,800][626795] Updated weights for policy 0, policy_version 139662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:11,502][626795] Updated weights for policy 0, policy_version 139672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:13,181][626795] Updated weights for policy 0, policy_version 139682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:13,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42462.2, 300 sec: 42265.3). Total num frames: 1144307712. Throughput: 0: 11055.6. Samples: 36050574. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:13,976][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:14,946][626795] Updated weights for policy 0, policy_version 139692 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:16,529][626795] Updated weights for policy 0, policy_version 139702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:18,354][626795] Updated weights for policy 0, policy_version 139712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:18,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42598.5, 300 sec: 42348.5). Total num frames: 1144545280. Throughput: 0: 11081.4. Samples: 36123408. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:18,977][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:20,005][626795] Updated weights for policy 0, policy_version 139722 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:21,815][626795] Updated weights for policy 0, policy_version 139732 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:23,499][626795] Updated weights for policy 0, policy_version 139742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:23,975][24592] Fps is (10 sec: 47513.0, 60 sec: 44919.4, 300 sec: 42404.0). Total num frames: 1144782848. Throughput: 0: 11031.7. Samples: 36193266. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:23,977][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:25,347][626795] Updated weights for policy 0, policy_version 139752 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:27,156][626795] Updated weights for policy 0, policy_version 139762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:31,049][24592] Fps is (10 sec: 36639.2, 60 sec: 43022.9, 300 sec: 41997.7). Total num frames: 1144987648. Throughput: 0: 10523.0. Samples: 36227508. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:31,050][24592] Avg episode reward: [(0, '4.297')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:31,477][626795] Updated weights for policy 0, policy_version 139772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:33,407][626795] Updated weights for policy 0, policy_version 139782 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:33,975][24592] Fps is (10 sec: 32768.3, 60 sec: 42598.5, 300 sec: 41932.0). Total num frames: 1145110528. Throughput: 0: 10336.9. Samples: 36266286. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:33,976][24592] Avg episode reward: [(0, '4.385')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:35,264][626795] Updated weights for policy 0, policy_version 139792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:37,046][626795] Updated weights for policy 0, policy_version 139802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:38,795][626795] Updated weights for policy 0, policy_version 139812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:38,975][24592] Fps is (10 sec: 45474.5, 60 sec: 42461.9, 300 sec: 41959.8). Total num frames: 1145348096. Throughput: 0: 10351.0. Samples: 36334104. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:38,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:40,460][626795] Updated weights for policy 0, policy_version 139822 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:42,100][626795] Updated weights for policy 0, policy_version 139832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:43,898][626795] Updated weights for policy 0, policy_version 139842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:43,976][24592] Fps is (10 sec: 47512.2, 60 sec: 42598.2, 300 sec: 42015.2). Total num frames: 1145585664. Throughput: 0: 11007.1. Samples: 36370776. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:43,977][24592] Avg episode reward: [(0, '4.420')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:45,616][626795] Updated weights for policy 0, policy_version 139852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:47,279][626795] Updated weights for policy 0, policy_version 139862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:48,943][626795] Updated weights for policy 0, policy_version 139872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:48,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42871.5, 300 sec: 42459.6). Total num frames: 1145831424. Throughput: 0: 11121.2. Samples: 36442716. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:48,976][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:50,582][626795] Updated weights for policy 0, policy_version 139882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:52,354][626795] Updated weights for policy 0, policy_version 139892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:53,976][24592] Fps is (10 sec: 48333.5, 60 sec: 43154.2, 300 sec: 42542.9). Total num frames: 1146068992. Throughput: 0: 11097.8. Samples: 36514086. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:53,976][24592] Avg episode reward: [(0, '4.420')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:54,100][626795] Updated weights for policy 0, policy_version 139902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:55,847][626795] Updated weights for policy 0, policy_version 139912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:57,690][626795] Updated weights for policy 0, policy_version 139922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:58,976][24592] Fps is (10 sec: 46693.3, 60 sec: 45056.1, 300 sec: 42542.8). Total num frames: 1146298368. Throughput: 0: 11084.7. Samples: 36549390. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:17:58,977][24592] Avg episode reward: [(0, '4.223')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:17:59,444][626795] Updated weights for policy 0, policy_version 139932 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:01,310][626795] Updated weights for policy 0, policy_version 139942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:03,143][626795] Updated weights for policy 0, policy_version 139952 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:05,577][24592] Fps is (10 sec: 36013.3, 60 sec: 43220.0, 300 sec: 42175.1). Total num frames: 1146486784. Throughput: 0: 10587.7. Samples: 36616806. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:05,578][24592] Avg episode reward: [(0, '4.415')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:07,365][626795] Updated weights for policy 0, policy_version 139962 (0.0035)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:08,976][24592] Fps is (10 sec: 33584.8, 60 sec: 42734.2, 300 sec: 42098.4). Total num frames: 1146634240. Throughput: 0: 10283.7. Samples: 36656040. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:08,978][24592] Avg episode reward: [(0, '4.387')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:09,393][626795] Updated weights for policy 0, policy_version 139972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:11,029][626795] Updated weights for policy 0, policy_version 139982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:12,793][626795] Updated weights for policy 0, policy_version 139992 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:13,976][24592] Fps is (10 sec: 44865.0, 60 sec: 42598.1, 300 sec: 42126.3). Total num frames: 1146863616. Throughput: 0: 10780.0. Samples: 36690258. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:13,977][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:14,544][626795] Updated weights for policy 0, policy_version 140002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:16,195][626795] Updated weights for policy 0, policy_version 140012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:17,906][626795] Updated weights for policy 0, policy_version 140022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:18,976][24592] Fps is (10 sec: 47515.5, 60 sec: 42734.5, 300 sec: 42209.6). Total num frames: 1147109376. Throughput: 0: 11038.4. Samples: 36763020. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:18,978][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:19,626][626795] Updated weights for policy 0, policy_version 140032 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:21,197][626795] Updated weights for policy 0, policy_version 140042 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:22,967][626795] Updated weights for policy 0, policy_version 140052 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:23,975][24592] Fps is (10 sec: 48335.1, 60 sec: 42735.0, 300 sec: 42626.2). Total num frames: 1147346944. Throughput: 0: 11133.6. Samples: 36835116. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:23,977][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:24,705][626795] Updated weights for policy 0, policy_version 140062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:26,339][626795] Updated weights for policy 0, policy_version 140072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:28,085][626795] Updated weights for policy 0, policy_version 140082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:28,975][24592] Fps is (10 sec: 48335.6, 60 sec: 44971.9, 300 sec: 42737.3). Total num frames: 1147592704. Throughput: 0: 11125.7. Samples: 36871428. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:28,976][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:29,902][626795] Updated weights for policy 0, policy_version 140092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:31,710][626795] Updated weights for policy 0, policy_version 140102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:33,526][626795] Updated weights for policy 0, policy_version 140112 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:33,975][24592] Fps is (10 sec: 46694.2, 60 sec: 45056.0, 300 sec: 42709.5). Total num frames: 1147813888. Throughput: 0: 11054.5. Samples: 36940170. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:33,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:35,193][626795] Updated weights for policy 0, policy_version 140122 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:37,245][626795] Updated weights for policy 0, policy_version 140132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:39,959][24592] Fps is (10 sec: 35053.2, 60 sec: 43120.1, 300 sec: 42346.1). Total num frames: 1147977728. Throughput: 0: 10009.4. Samples: 36974358. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:39,960][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:41,324][626795] Updated weights for policy 0, policy_version 140142 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:43,180][626795] Updated weights for policy 0, policy_version 140152 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:43,976][24592] Fps is (10 sec: 34405.3, 60 sec: 42871.5, 300 sec: 42292.9). Total num frames: 1148157952. Throughput: 0: 10352.5. Samples: 37015254. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:43,976][24592] Avg episode reward: [(0, '4.254')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:44,875][626795] Updated weights for policy 0, policy_version 140162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:46,688][626795] Updated weights for policy 0, policy_version 140172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:48,423][626795] Updated weights for policy 0, policy_version 140182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:48,976][24592] Fps is (10 sec: 46336.4, 60 sec: 42734.6, 300 sec: 42320.6). Total num frames: 1148395520. Throughput: 0: 10783.9. Samples: 37084818. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:48,978][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:50,135][626795] Updated weights for policy 0, policy_version 140192 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:51,754][626795] Updated weights for policy 0, policy_version 140202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:53,434][626795] Updated weights for policy 0, policy_version 140212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:53,975][24592] Fps is (10 sec: 48334.0, 60 sec: 42871.5, 300 sec: 42404.0). Total num frames: 1148641280. Throughput: 0: 11137.2. Samples: 37157202. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:53,977][24592] Avg episode reward: [(0, '4.332')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:55,151][626795] Updated weights for policy 0, policy_version 140222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:56,972][626795] Updated weights for policy 0, policy_version 140232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:18:58,550][626795] Updated weights for policy 0, policy_version 140242 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:58,975][24592] Fps is (10 sec: 48335.2, 60 sec: 43008.2, 300 sec: 42848.3). Total num frames: 1148878848. Throughput: 0: 11180.3. Samples: 37193364. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:18:58,976][24592] Avg episode reward: [(0, '4.419')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:00,187][626795] Updated weights for policy 0, policy_version 140252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:01,953][626795] Updated weights for policy 0, policy_version 140262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:02,720][626772] Signal inference workers to stop experience collection... (550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:02,721][626772] Signal inference workers to resume experience collection... (550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:02,736][626795] InferenceWorker_p0-w0: stopping experience collection (550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:02,736][626795] InferenceWorker_p0-w0: resuming experience collection (550 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:03,754][626795] Updated weights for policy 0, policy_version 140272 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:03,976][24592] Fps is (10 sec: 46691.0, 60 sec: 44888.0, 300 sec: 42876.0). Total num frames: 1149108224. Throughput: 0: 11164.2. Samples: 37265412. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:03,982][24592] Avg episode reward: [(0, '4.159')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:04,005][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000140273_1149116416.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:04,045][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000139019_1138843648.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:05,633][626795] Updated weights for policy 0, policy_version 140282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:07,505][626795] Updated weights for policy 0, policy_version 140292 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:08,975][24592] Fps is (10 sec: 45874.8, 60 sec: 45056.7, 300 sec: 42903.9). Total num frames: 1149337600. Throughput: 0: 11055.2. Samples: 37332600. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:08,977][24592] Avg episode reward: [(0, '4.333')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:09,154][626795] Updated weights for policy 0, policy_version 140302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:11,127][626795] Updated weights for policy 0, policy_version 140312 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:14,280][24592] Fps is (10 sec: 35777.6, 60 sec: 43334.6, 300 sec: 42554.5). Total num frames: 1149476864. Throughput: 0: 10900.0. Samples: 37365246. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:14,282][24592] Avg episode reward: [(0, '4.490')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:15,224][626795] Updated weights for policy 0, policy_version 140322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:17,109][626795] Updated weights for policy 0, policy_version 140332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:18,881][626795] Updated weights for policy 0, policy_version 140342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:18,975][24592] Fps is (10 sec: 34406.4, 60 sec: 42871.8, 300 sec: 42487.3). Total num frames: 1149681664. Throughput: 0: 10380.7. Samples: 37407300. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:18,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:20,575][626795] Updated weights for policy 0, policy_version 140352 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:22,255][626795] Updated weights for policy 0, policy_version 140362 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:23,975][24592] Fps is (10 sec: 45625.9, 60 sec: 42871.5, 300 sec: 42515.1). Total num frames: 1149919232. Throughput: 0: 11456.8. Samples: 37478640. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:23,977][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:23,985][626795] Updated weights for policy 0, policy_version 140372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:25,708][626795] Updated weights for policy 0, policy_version 140382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:27,313][626795] Updated weights for policy 0, policy_version 140392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:28,975][24592] Fps is (10 sec: 48332.6, 60 sec: 42871.4, 300 sec: 42598.4). Total num frames: 1150164992. Throughput: 0: 11107.8. Samples: 37515102. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:28,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:29,075][626795] Updated weights for policy 0, policy_version 140402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:30,693][626795] Updated weights for policy 0, policy_version 140412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:32,396][626795] Updated weights for policy 0, policy_version 140422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:33,975][24592] Fps is (10 sec: 48332.9, 60 sec: 43144.6, 300 sec: 43042.7). Total num frames: 1150402560. Throughput: 0: 11179.3. Samples: 37587882. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:33,980][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:34,115][626795] Updated weights for policy 0, policy_version 140432 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:35,836][626795] Updated weights for policy 0, policy_version 140442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:37,785][626795] Updated weights for policy 0, policy_version 140452 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:38,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44974.3, 300 sec: 43098.3). Total num frames: 1150631936. Throughput: 0: 11081.7. Samples: 37655880. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:38,978][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:39,711][626795] Updated weights for policy 0, policy_version 140462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:41,526][626795] Updated weights for policy 0, policy_version 140472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:43,334][626795] Updated weights for policy 0, policy_version 140482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:43,976][24592] Fps is (10 sec: 45054.4, 60 sec: 44919.5, 300 sec: 43042.7). Total num frames: 1150853120. Throughput: 0: 11017.5. Samples: 37689156. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:43,978][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:45,167][626795] Updated weights for policy 0, policy_version 140492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:48,975][24592] Fps is (10 sec: 35225.8, 60 sec: 43144.8, 300 sec: 42709.5). Total num frames: 1150984192. Throughput: 0: 10606.0. Samples: 37742676. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:48,977][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:49,187][626795] Updated weights for policy 0, policy_version 140502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:50,963][626795] Updated weights for policy 0, policy_version 140512 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:52,731][626795] Updated weights for policy 0, policy_version 140522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:53,975][24592] Fps is (10 sec: 35226.8, 60 sec: 42735.0, 300 sec: 42681.7). Total num frames: 1151205376. Throughput: 0: 10374.4. Samples: 37799448. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:53,977][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:54,451][626795] Updated weights for policy 0, policy_version 140532 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:56,237][626795] Updated weights for policy 0, policy_version 140542 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:57,897][626795] Updated weights for policy 0, policy_version 140552 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:58,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42871.4, 300 sec: 42765.0). Total num frames: 1151451136. Throughput: 0: 10524.5. Samples: 37835646. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:19:58,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:19:59,581][626795] Updated weights for policy 0, policy_version 140562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:01,319][626795] Updated weights for policy 0, policy_version 140572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:03,021][626795] Updated weights for policy 0, policy_version 140582 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:03,975][24592] Fps is (10 sec: 48332.8, 60 sec: 43008.6, 300 sec: 42820.6). Total num frames: 1151688704. Throughput: 0: 11128.8. Samples: 37908096. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:03,976][24592] Avg episode reward: [(0, '4.378')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:04,719][626795] Updated weights for policy 0, policy_version 140592 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:06,502][626795] Updated weights for policy 0, policy_version 140602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:08,089][626795] Updated weights for policy 0, policy_version 140612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:08,975][24592] Fps is (10 sec: 47513.6, 60 sec: 43144.6, 300 sec: 43237.1). Total num frames: 1151926272. Throughput: 0: 11137.9. Samples: 37979844. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:08,977][24592] Avg episode reward: [(0, '4.379')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:09,883][626795] Updated weights for policy 0, policy_version 140622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:11,697][626795] Updated weights for policy 0, policy_version 140632 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:13,613][626795] Updated weights for policy 0, policy_version 140642 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:13,975][24592] Fps is (10 sec: 46693.9, 60 sec: 44874.0, 300 sec: 43320.4). Total num frames: 1152155648. Throughput: 0: 11093.1. Samples: 38014290. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:13,978][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:15,327][626795] Updated weights for policy 0, policy_version 140652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:17,201][626795] Updated weights for policy 0, policy_version 140662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:18,976][24592] Fps is (10 sec: 45054.0, 60 sec: 44919.2, 300 sec: 43264.8). Total num frames: 1152376832. Throughput: 0: 10963.7. Samples: 38081256. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:18,978][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:19,128][626795] Updated weights for policy 0, policy_version 140672 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:23,151][626795] Updated weights for policy 0, policy_version 140682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:23,975][24592] Fps is (10 sec: 34406.5, 60 sec: 43008.0, 300 sec: 42876.1). Total num frames: 1152499712. Throughput: 0: 10378.5. Samples: 38122914. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:23,977][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:24,963][626795] Updated weights for policy 0, policy_version 140692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:26,840][626795] Updated weights for policy 0, policy_version 140702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:28,478][626795] Updated weights for policy 0, policy_version 140712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:28,975][24592] Fps is (10 sec: 35227.2, 60 sec: 42735.0, 300 sec: 42848.4). Total num frames: 1152729088. Throughput: 0: 10385.9. Samples: 38156520. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:28,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:30,241][626795] Updated weights for policy 0, policy_version 140722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:31,957][626795] Updated weights for policy 0, policy_version 140732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:33,627][626795] Updated weights for policy 0, policy_version 140742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:33,976][24592] Fps is (10 sec: 47510.8, 60 sec: 42871.0, 300 sec: 42959.3). Total num frames: 1152974848. Throughput: 0: 10791.6. Samples: 38228304. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:33,978][24592] Avg episode reward: [(0, '4.277')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:35,404][626795] Updated weights for policy 0, policy_version 140752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:37,009][626795] Updated weights for policy 0, policy_version 140762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:38,653][626795] Updated weights for policy 0, policy_version 140772 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:38,976][24592] Fps is (10 sec: 48331.5, 60 sec: 43007.9, 300 sec: 43014.9). Total num frames: 1153212416. Throughput: 0: 11158.7. Samples: 38301594. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:38,977][24592] Avg episode reward: [(0, '4.422')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:40,394][626795] Updated weights for policy 0, policy_version 140782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:42,031][626795] Updated weights for policy 0, policy_version 140792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:43,798][626795] Updated weights for policy 0, policy_version 140802 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:43,975][24592] Fps is (10 sec: 48335.4, 60 sec: 43417.8, 300 sec: 43487.0). Total num frames: 1153458176. Throughput: 0: 11150.9. Samples: 38337438. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:43,976][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:45,582][626795] Updated weights for policy 0, policy_version 140812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:47,459][626795] Updated weights for policy 0, policy_version 140822 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:48,975][24592] Fps is (10 sec: 46695.8, 60 sec: 44919.5, 300 sec: 43514.8). Total num frames: 1153679360. Throughput: 0: 11069.3. Samples: 38406216. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:48,977][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:49,259][626795] Updated weights for policy 0, policy_version 140832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:51,232][626795] Updated weights for policy 0, policy_version 140842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:53,187][626795] Updated weights for policy 0, policy_version 140852 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:53,976][24592] Fps is (10 sec: 43417.2, 60 sec: 44782.8, 300 sec: 43459.2). Total num frames: 1153892352. Throughput: 0: 10902.1. Samples: 38470440. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:53,978][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:57,221][626795] Updated weights for policy 0, policy_version 140862 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:58,976][24592] Fps is (10 sec: 33586.3, 60 sec: 42734.8, 300 sec: 43098.2). Total num frames: 1154015232. Throughput: 0: 10477.3. Samples: 38485770. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:20:58,977][24592] Avg episode reward: [(0, '4.474')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:20:59,014][626795] Updated weights for policy 0, policy_version 140872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:00,830][626795] Updated weights for policy 0, policy_version 140882 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:02,614][626795] Updated weights for policy 0, policy_version 140892 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:03,975][24592] Fps is (10 sec: 35226.1, 60 sec: 42598.4, 300 sec: 43070.5). Total num frames: 1154244608. Throughput: 0: 10370.8. Samples: 38547936. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:03,976][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:04,013][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000140900_1154252800.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:04,057][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000139628_1143832576.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:04,364][626795] Updated weights for policy 0, policy_version 140902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:06,081][626795] Updated weights for policy 0, policy_version 140912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:07,866][626795] Updated weights for policy 0, policy_version 140922 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:08,975][24592] Fps is (10 sec: 46695.5, 60 sec: 42598.4, 300 sec: 43126.1). Total num frames: 1154482176. Throughput: 0: 11028.4. Samples: 38619192. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:08,976][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:09,515][626795] Updated weights for policy 0, policy_version 140932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:11,183][626795] Updated weights for policy 0, policy_version 140942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:12,891][626795] Updated weights for policy 0, policy_version 140952 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:13,976][24592] Fps is (10 sec: 48330.4, 60 sec: 42871.1, 300 sec: 43181.5). Total num frames: 1154727936. Throughput: 0: 11081.6. Samples: 38655198. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:13,977][24592] Avg episode reward: [(0, '4.295')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:14,645][626795] Updated weights for policy 0, policy_version 140962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:16,307][626795] Updated weights for policy 0, policy_version 140972 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:18,127][626795] Updated weights for policy 0, policy_version 140982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:18,975][24592] Fps is (10 sec: 48332.4, 60 sec: 43144.8, 300 sec: 43653.6). Total num frames: 1154965504. Throughput: 0: 11091.7. Samples: 38727426. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:18,977][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:19,868][626795] Updated weights for policy 0, policy_version 140992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:21,630][626795] Updated weights for policy 0, policy_version 141002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:23,403][626795] Updated weights for policy 0, policy_version 141012 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:23,975][24592] Fps is (10 sec: 45877.8, 60 sec: 44783.0, 300 sec: 43625.9). Total num frames: 1155186688. Throughput: 0: 10977.1. Samples: 38795562. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:23,976][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:25,321][626795] Updated weights for policy 0, policy_version 141022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:27,304][626795] Updated weights for policy 0, policy_version 141032 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:28,976][24592] Fps is (10 sec: 44235.9, 60 sec: 44646.2, 300 sec: 43570.3). Total num frames: 1155407872. Throughput: 0: 10922.9. Samples: 38828970. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:28,976][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:29,149][626795] Updated weights for policy 0, policy_version 141042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:33,215][626795] Updated weights for policy 0, policy_version 141052 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:33,975][24592] Fps is (10 sec: 33586.9, 60 sec: 42462.3, 300 sec: 43126.0). Total num frames: 1155522560. Throughput: 0: 10288.4. Samples: 38869194. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:33,977][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:35,142][626795] Updated weights for policy 0, policy_version 141062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:36,746][626795] Updated weights for policy 0, policy_version 141072 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:38,417][626795] Updated weights for policy 0, policy_version 141082 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:38,976][24592] Fps is (10 sec: 35225.5, 60 sec: 42461.8, 300 sec: 43153.7). Total num frames: 1155760128. Throughput: 0: 10405.2. Samples: 38938674. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:38,979][24592] Avg episode reward: [(0, '4.462')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:40,217][626795] Updated weights for policy 0, policy_version 141092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:41,932][626795] Updated weights for policy 0, policy_version 141102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:43,591][626795] Updated weights for policy 0, policy_version 141112 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:43,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42461.9, 300 sec: 43209.3). Total num frames: 1156005888. Throughput: 0: 10857.8. Samples: 38974368. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:43,976][24592] Avg episode reward: [(0, '4.419')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:45,288][626795] Updated weights for policy 0, policy_version 141122 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:47,028][626795] Updated weights for policy 0, policy_version 141132 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:48,738][626795] Updated weights for policy 0, policy_version 141142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:48,975][24592] Fps is (10 sec: 48334.0, 60 sec: 42734.9, 300 sec: 43266.9). Total num frames: 1156243456. Throughput: 0: 11091.2. Samples: 39047040. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:48,976][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:50,282][626795] Updated weights for policy 0, policy_version 141152 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:52,050][626795] Updated weights for policy 0, policy_version 141162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:53,959][626795] Updated weights for policy 0, policy_version 141172 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:53,975][24592] Fps is (10 sec: 47513.2, 60 sec: 43144.6, 300 sec: 43681.5). Total num frames: 1156481024. Throughput: 0: 11083.1. Samples: 39117930. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:53,976][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:55,748][626795] Updated weights for policy 0, policy_version 141182 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:57,548][626795] Updated weights for policy 0, policy_version 141192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:58,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44783.1, 300 sec: 43653.6). Total num frames: 1156702208. Throughput: 0: 11038.5. Samples: 39151926. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:21:58,981][24592] Avg episode reward: [(0, '4.372')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:21:59,291][626795] Updated weights for policy 0, policy_version 141202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:01,333][626795] Updated weights for policy 0, policy_version 141212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:03,343][626795] Updated weights for policy 0, policy_version 141222 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:05,814][24592] Fps is (10 sec: 35981.2, 60 sec: 43053.7, 300 sec: 43245.2). Total num frames: 1156907008. Throughput: 0: 10451.3. Samples: 39216954. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:05,816][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:07,256][626795] Updated weights for policy 0, policy_version 141232 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:08,976][24592] Fps is (10 sec: 34403.8, 60 sec: 42734.4, 300 sec: 43181.4). Total num frames: 1157046272. Throughput: 0: 10308.5. Samples: 39259452. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:08,978][24592] Avg episode reward: [(0, '4.508')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:08,986][626795] Updated weights for policy 0, policy_version 141242 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:10,823][626795] Updated weights for policy 0, policy_version 141252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:12,554][626795] Updated weights for policy 0, policy_version 141262 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:13,975][24592] Fps is (10 sec: 46175.2, 60 sec: 42598.8, 300 sec: 43181.6). Total num frames: 1157283840. Throughput: 0: 10345.3. Samples: 39294504. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:13,978][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:14,117][626795] Updated weights for policy 0, policy_version 141272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:15,879][626795] Updated weights for policy 0, policy_version 141282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:17,613][626795] Updated weights for policy 0, policy_version 141292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:18,976][24592] Fps is (10 sec: 48334.8, 60 sec: 42734.7, 300 sec: 43209.3). Total num frames: 1157529600. Throughput: 0: 11067.4. Samples: 39367230. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:18,977][24592] Avg episode reward: [(0, '4.305')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:19,225][626795] Updated weights for policy 0, policy_version 141302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:20,964][626795] Updated weights for policy 0, policy_version 141312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:22,662][626795] Updated weights for policy 0, policy_version 141322 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:23,975][24592] Fps is (10 sec: 48333.0, 60 sec: 43008.0, 300 sec: 43627.1). Total num frames: 1157767168. Throughput: 0: 11150.7. Samples: 39440454. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:23,977][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:24,395][626795] Updated weights for policy 0, policy_version 141332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:26,097][626795] Updated weights for policy 0, policy_version 141342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:27,830][626795] Updated weights for policy 0, policy_version 141352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:28,975][24592] Fps is (10 sec: 46695.6, 60 sec: 43144.7, 300 sec: 43681.4). Total num frames: 1157996544. Throughput: 0: 11144.6. Samples: 39475878. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:28,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:29,630][626795] Updated weights for policy 0, policy_version 141362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:31,432][626795] Updated weights for policy 0, policy_version 141372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:33,306][626795] Updated weights for policy 0, policy_version 141382 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:33,975][24592] Fps is (10 sec: 45875.0, 60 sec: 45056.0, 300 sec: 43653.6). Total num frames: 1158225920. Throughput: 0: 11036.5. Samples: 39543684. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:33,977][24592] Avg episode reward: [(0, '4.423')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:35,104][626795] Updated weights for policy 0, policy_version 141392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:37,139][626795] Updated weights for policy 0, policy_version 141402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:40,355][24592] Fps is (10 sec: 35993.9, 60 sec: 43109.1, 300 sec: 43257.0). Total num frames: 1158406144. Throughput: 0: 9900.3. Samples: 39577104. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:40,357][24592] Avg episode reward: [(0, '4.312')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:41,193][626795] Updated weights for policy 0, policy_version 141412 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:43,118][626795] Updated weights for policy 0, policy_version 141422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:43,975][24592] Fps is (10 sec: 34406.4, 60 sec: 42734.9, 300 sec: 43181.6). Total num frames: 1158569984. Throughput: 0: 10337.2. Samples: 39617100. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:43,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:44,769][626795] Updated weights for policy 0, policy_version 141432 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:46,518][626795] Updated weights for policy 0, policy_version 141442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:48,191][626795] Updated weights for policy 0, policy_version 141452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:48,975][24592] Fps is (10 sec: 46565.9, 60 sec: 42735.0, 300 sec: 43181.6). Total num frames: 1158807552. Throughput: 0: 10906.7. Samples: 39687696. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:48,977][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:49,874][626795] Updated weights for policy 0, policy_version 141462 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:51,559][626795] Updated weights for policy 0, policy_version 141472 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:53,280][626795] Updated weights for policy 0, policy_version 141482 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:53,975][24592] Fps is (10 sec: 47513.2, 60 sec: 42734.9, 300 sec: 43209.3). Total num frames: 1159045120. Throughput: 0: 11128.0. Samples: 39760206. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:53,978][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:55,105][626795] Updated weights for policy 0, policy_version 141492 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:56,863][626795] Updated weights for policy 0, policy_version 141502 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:22:58,448][626795] Updated weights for policy 0, policy_version 141512 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:58,975][24592] Fps is (10 sec: 47513.8, 60 sec: 43008.0, 300 sec: 43612.7). Total num frames: 1159282688. Throughput: 0: 11122.5. Samples: 39795018. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:22:58,976][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:00,291][626795] Updated weights for policy 0, policy_version 141522 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:01,960][626795] Updated weights for policy 0, policy_version 141532 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:03,823][626795] Updated weights for policy 0, policy_version 141542 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:03,976][24592] Fps is (10 sec: 46692.9, 60 sec: 44790.2, 300 sec: 43653.7). Total num frames: 1159512064. Throughput: 0: 11072.1. Samples: 39865476. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:03,977][24592] Avg episode reward: [(0, '4.332')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:04,008][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000141543_1159520256.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:04,064][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000140273_1149116416.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:05,580][626795] Updated weights for policy 0, policy_version 141552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:07,400][626795] Updated weights for policy 0, policy_version 141562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:08,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44920.1, 300 sec: 43653.7). Total num frames: 1159741440. Throughput: 0: 10957.1. Samples: 39933522. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:08,977][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:09,234][626795] Updated weights for policy 0, policy_version 141572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:11,259][626795] Updated weights for policy 0, policy_version 141582 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:14,776][24592] Fps is (10 sec: 34892.5, 60 sec: 42846.2, 300 sec: 43203.3). Total num frames: 1159888896. Throughput: 0: 10685.1. Samples: 39965256. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:14,778][24592] Avg episode reward: [(0, '4.418')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:15,314][626795] Updated weights for policy 0, policy_version 141592 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:17,218][626795] Updated weights for policy 0, policy_version 141602 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:18,914][626795] Updated weights for policy 0, policy_version 141612 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:18,976][24592] Fps is (10 sec: 34405.2, 60 sec: 42598.4, 300 sec: 43181.5). Total num frames: 1160085504. Throughput: 0: 10310.3. Samples: 40007652. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:18,977][24592] Avg episode reward: [(0, '4.316')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:20,566][626795] Updated weights for policy 0, policy_version 141622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:22,322][626795] Updated weights for policy 0, policy_version 141632 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:23,972][626795] Updated weights for policy 0, policy_version 141642 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:23,975][24592] Fps is (10 sec: 48084.3, 60 sec: 42734.9, 300 sec: 43181.5). Total num frames: 1160331264. Throughput: 0: 11516.0. Samples: 40079436. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:23,976][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:25,693][626795] Updated weights for policy 0, policy_version 141652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:27,385][626795] Updated weights for policy 0, policy_version 141662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:28,976][24592] Fps is (10 sec: 48332.8, 60 sec: 42871.3, 300 sec: 43237.1). Total num frames: 1160568832. Throughput: 0: 11081.3. Samples: 40115760. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:28,976][24592] Avg episode reward: [(0, '4.240')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:29,089][626795] Updated weights for policy 0, policy_version 141672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:30,809][626795] Updated weights for policy 0, policy_version 141682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:32,415][626795] Updated weights for policy 0, policy_version 141692 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:33,976][24592] Fps is (10 sec: 47510.7, 60 sec: 43007.5, 300 sec: 43632.5). Total num frames: 1160806400. Throughput: 0: 11119.6. Samples: 40188084. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:33,977][24592] Avg episode reward: [(0, '4.324')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:34,274][626795] Updated weights for policy 0, policy_version 141702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:35,909][626795] Updated weights for policy 0, policy_version 141712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:37,719][626795] Updated weights for policy 0, policy_version 141722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:38,976][24592] Fps is (10 sec: 46693.0, 60 sec: 44858.4, 300 sec: 43653.6). Total num frames: 1161035776. Throughput: 0: 11050.8. Samples: 40257498. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:38,978][24592] Avg episode reward: [(0, '4.212')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:39,563][626795] Updated weights for policy 0, policy_version 141732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:41,397][626795] Updated weights for policy 0, policy_version 141742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:43,187][626795] Updated weights for policy 0, policy_version 141752 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:43,976][24592] Fps is (10 sec: 45058.1, 60 sec: 44782.8, 300 sec: 43598.1). Total num frames: 1161256960. Throughput: 0: 11041.0. Samples: 40291866. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:43,976][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:45,142][626795] Updated weights for policy 0, policy_version 141762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:49,241][24592] Fps is (10 sec: 34317.3, 60 sec: 42818.7, 300 sec: 43170.5). Total num frames: 1161388032. Throughput: 0: 10131.6. Samples: 40324080. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:49,242][24592] Avg episode reward: [(0, '4.329')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:49,378][626795] Updated weights for policy 0, policy_version 141772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:51,191][626795] Updated weights for policy 0, policy_version 141782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:53,040][626795] Updated weights for policy 0, policy_version 141792 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:53,985][24592] Fps is (10 sec: 34373.2, 60 sec: 42591.5, 300 sec: 43124.6). Total num frames: 1161601024. Throughput: 0: 10328.4. Samples: 40398402. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:53,986][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:54,725][626795] Updated weights for policy 0, policy_version 141802 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:55,863][626772] Signal inference workers to stop experience collection... (600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:55,868][626772] Signal inference workers to resume experience collection... (600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:55,894][626795] InferenceWorker_p0-w0: stopping experience collection (600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:55,895][626795] InferenceWorker_p0-w0: resuming experience collection (600 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:56,451][626795] Updated weights for policy 0, policy_version 141812 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:58,121][626795] Updated weights for policy 0, policy_version 141822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:58,975][24592] Fps is (10 sec: 47125.3, 60 sec: 42734.9, 300 sec: 43181.7). Total num frames: 1161846784. Throughput: 0: 10607.0. Samples: 40434084. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:23:58,977][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:23:59,779][626795] Updated weights for policy 0, policy_version 141832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:01,447][626795] Updated weights for policy 0, policy_version 141842 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:03,239][626795] Updated weights for policy 0, policy_version 141852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:03,975][24592] Fps is (10 sec: 48380.8, 60 sec: 42871.8, 300 sec: 43209.3). Total num frames: 1162084352. Throughput: 0: 11095.0. Samples: 40506924. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:03,976][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:04,903][626795] Updated weights for policy 0, policy_version 141862 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:06,614][626795] Updated weights for policy 0, policy_version 141872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:08,339][626795] Updated weights for policy 0, policy_version 141882 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:08,975][24592] Fps is (10 sec: 47513.4, 60 sec: 43007.9, 300 sec: 43587.5). Total num frames: 1162321920. Throughput: 0: 11103.3. Samples: 40579086. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:08,986][24592] Avg episode reward: [(0, '4.385')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:10,147][626795] Updated weights for policy 0, policy_version 141892 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:11,961][626795] Updated weights for policy 0, policy_version 141902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:13,663][626795] Updated weights for policy 0, policy_version 141912 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:13,976][24592] Fps is (10 sec: 46693.7, 60 sec: 44973.1, 300 sec: 43625.9). Total num frames: 1162551296. Throughput: 0: 11054.7. Samples: 40613220. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:13,977][24592] Avg episode reward: [(0, '4.432')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:15,541][626795] Updated weights for policy 0, policy_version 141922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:17,316][626795] Updated weights for policy 0, policy_version 141932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:18,975][24592] Fps is (10 sec: 45055.6, 60 sec: 44783.0, 300 sec: 43570.3). Total num frames: 1162772480. Throughput: 0: 10953.2. Samples: 40680972. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:18,979][24592] Avg episode reward: [(0, '4.280')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:19,355][626795] Updated weights for policy 0, policy_version 141942 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:21,204][626795] Updated weights for policy 0, policy_version 141952 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:23,975][24592] Fps is (10 sec: 34406.6, 60 sec: 42735.0, 300 sec: 43153.8). Total num frames: 1162895360. Throughput: 0: 10318.1. Samples: 40721808. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:23,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:25,157][626795] Updated weights for policy 0, policy_version 141962 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:26,960][626795] Updated weights for policy 0, policy_version 141972 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:28,564][626795] Updated weights for policy 0, policy_version 141982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:28,975][24592] Fps is (10 sec: 35226.0, 60 sec: 42598.6, 300 sec: 43126.0). Total num frames: 1163124736. Throughput: 0: 10305.0. Samples: 40755588. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:28,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:30,279][626795] Updated weights for policy 0, policy_version 141992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:32,165][626795] Updated weights for policy 0, policy_version 142002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:33,819][626795] Updated weights for policy 0, policy_version 142012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:33,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42598.8, 300 sec: 43153.8). Total num frames: 1163362304. Throughput: 0: 11256.1. Samples: 40827618. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:33,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:35,513][626795] Updated weights for policy 0, policy_version 142022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:37,214][626795] Updated weights for policy 0, policy_version 142032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:38,938][626795] Updated weights for policy 0, policy_version 142042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:38,976][24592] Fps is (10 sec: 48331.7, 60 sec: 42871.7, 300 sec: 43237.1). Total num frames: 1163608064. Throughput: 0: 11140.6. Samples: 40899624. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:38,977][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:40,518][626795] Updated weights for policy 0, policy_version 142052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:42,249][626795] Updated weights for policy 0, policy_version 142062 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:43,975][24592] Fps is (10 sec: 48332.2, 60 sec: 43144.6, 300 sec: 43598.1). Total num frames: 1163845632. Throughput: 0: 11150.6. Samples: 40935864. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:43,976][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:44,083][626795] Updated weights for policy 0, policy_version 142072 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:45,763][626795] Updated weights for policy 0, policy_version 142082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:47,799][626795] Updated weights for policy 0, policy_version 142092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:48,976][24592] Fps is (10 sec: 46694.1, 60 sec: 44981.6, 300 sec: 43625.8). Total num frames: 1164075008. Throughput: 0: 11070.6. Samples: 41005104. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:48,976][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:49,451][626795] Updated weights for policy 0, policy_version 142102 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:51,322][626795] Updated weights for policy 0, policy_version 142112 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:53,243][626795] Updated weights for policy 0, policy_version 142122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:53,975][24592] Fps is (10 sec: 44237.2, 60 sec: 44790.2, 300 sec: 43514.8). Total num frames: 1164288000. Throughput: 0: 10921.2. Samples: 41070540. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:53,977][24592] Avg episode reward: [(0, '4.306')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:55,296][626795] Updated weights for policy 0, policy_version 142132 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:58,975][24592] Fps is (10 sec: 33588.4, 60 sec: 42734.9, 300 sec: 43126.0). Total num frames: 1164410880. Throughput: 0: 10679.6. Samples: 41093802. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:24:58,976][24592] Avg episode reward: [(0, '4.348')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:24:59,270][626795] Updated weights for policy 0, policy_version 142142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:01,090][626795] Updated weights for policy 0, policy_version 142152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:02,784][626795] Updated weights for policy 0, policy_version 142162 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:03,975][24592] Fps is (10 sec: 35225.5, 60 sec: 42598.3, 300 sec: 43098.2). Total num frames: 1164640256. Throughput: 0: 10345.5. Samples: 41146518. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:03,977][24592] Avg episode reward: [(0, '4.388')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000142168_1164640256.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:04,044][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000140900_1154252800.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:04,546][626795] Updated weights for policy 0, policy_version 142172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:06,228][626795] Updated weights for policy 0, policy_version 142182 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:07,939][626795] Updated weights for policy 0, policy_version 142192 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:08,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42735.0, 300 sec: 43153.8). Total num frames: 1164886016. Throughput: 0: 11039.5. Samples: 41218584. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:08,977][24592] Avg episode reward: [(0, '4.350')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:09,620][626795] Updated weights for policy 0, policy_version 142202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:11,365][626795] Updated weights for policy 0, policy_version 142212 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:12,968][626795] Updated weights for policy 0, policy_version 142222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:13,976][24592] Fps is (10 sec: 48332.4, 60 sec: 42871.4, 300 sec: 43209.4). Total num frames: 1165123584. Throughput: 0: 11104.1. Samples: 41255274. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:13,978][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:14,738][626795] Updated weights for policy 0, policy_version 142232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:16,388][626795] Updated weights for policy 0, policy_version 142242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:18,133][626795] Updated weights for policy 0, policy_version 142252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:18,975][24592] Fps is (10 sec: 47513.4, 60 sec: 43144.6, 300 sec: 43598.1). Total num frames: 1165361152. Throughput: 0: 11100.0. Samples: 41327118. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:18,977][24592] Avg episode reward: [(0, '4.329')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:19,969][626795] Updated weights for policy 0, policy_version 142262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:21,862][626795] Updated weights for policy 0, policy_version 142272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:23,626][626795] Updated weights for policy 0, policy_version 142282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:23,975][24592] Fps is (10 sec: 45876.0, 60 sec: 44782.9, 300 sec: 43570.3). Total num frames: 1165582336. Throughput: 0: 11006.5. Samples: 41394912. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:23,977][24592] Avg episode reward: [(0, '4.335')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:25,389][626795] Updated weights for policy 0, policy_version 142292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:27,300][626795] Updated weights for policy 0, policy_version 142302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:28,975][24592] Fps is (10 sec: 44236.2, 60 sec: 44646.3, 300 sec: 43487.1). Total num frames: 1165803520. Throughput: 0: 10926.5. Samples: 41427558. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:28,976][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:29,315][626795] Updated weights for policy 0, policy_version 142312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:33,248][626795] Updated weights for policy 0, policy_version 142322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:33,975][24592] Fps is (10 sec: 35225.2, 60 sec: 42871.4, 300 sec: 43126.0). Total num frames: 1165934592. Throughput: 0: 10307.0. Samples: 41468916. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:33,977][24592] Avg episode reward: [(0, '4.391')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:35,147][626795] Updated weights for policy 0, policy_version 142332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:36,762][626795] Updated weights for policy 0, policy_version 142342 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:38,435][626795] Updated weights for policy 0, policy_version 142352 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:38,977][24592] Fps is (10 sec: 36860.3, 60 sec: 42734.3, 300 sec: 43098.1). Total num frames: 1166172160. Throughput: 0: 10424.3. Samples: 41539644. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:38,978][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:40,193][626795] Updated weights for policy 0, policy_version 142362 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:41,809][626795] Updated weights for policy 0, policy_version 142372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:43,584][626795] Updated weights for policy 0, policy_version 142382 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:43,975][24592] Fps is (10 sec: 47514.0, 60 sec: 42735.0, 300 sec: 43153.8). Total num frames: 1166409728. Throughput: 0: 10706.4. Samples: 41575590. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:43,977][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:45,284][626795] Updated weights for policy 0, policy_version 142392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:46,924][626795] Updated weights for policy 0, policy_version 142402 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:48,667][626795] Updated weights for policy 0, policy_version 142412 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:48,975][24592] Fps is (10 sec: 47519.2, 60 sec: 42871.7, 300 sec: 43237.1). Total num frames: 1166647296. Throughput: 0: 11143.5. Samples: 41647974. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:48,977][24592] Avg episode reward: [(0, '4.375')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:50,383][626795] Updated weights for policy 0, policy_version 142422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:52,195][626795] Updated weights for policy 0, policy_version 142432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:53,977][24592] Fps is (10 sec: 46686.9, 60 sec: 43143.4, 300 sec: 43597.9). Total num frames: 1166876672. Throughput: 0: 11086.1. Samples: 41717478. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:53,978][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:54,172][626795] Updated weights for policy 0, policy_version 142442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:55,860][626795] Updated weights for policy 0, policy_version 142452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:57,720][626795] Updated weights for policy 0, policy_version 142462 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:58,975][24592] Fps is (10 sec: 44236.4, 60 sec: 44646.3, 300 sec: 43542.6). Total num frames: 1167089664. Throughput: 0: 10999.5. Samples: 41750250. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:25:58,977][24592] Avg episode reward: [(0, '4.386')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:25:59,636][626795] Updated weights for policy 0, policy_version 142472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:01,225][626795] Updated weights for policy 0, policy_version 142482 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:03,019][626795] Updated weights for policy 0, policy_version 142492 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:03,975][24592] Fps is (10 sec: 45063.5, 60 sec: 44783.0, 300 sec: 43542.6). Total num frames: 1167327232. Throughput: 0: 10941.1. Samples: 41819466. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:03,976][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:07,348][626795] Updated weights for policy 0, policy_version 142502 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:08,975][24592] Fps is (10 sec: 36044.8, 60 sec: 42734.8, 300 sec: 43126.1). Total num frames: 1167450112. Throughput: 0: 10346.2. Samples: 41860494. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:08,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:09,050][626795] Updated weights for policy 0, policy_version 142512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:10,886][626795] Updated weights for policy 0, policy_version 142522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:12,597][626795] Updated weights for policy 0, policy_version 142532 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:13,975][24592] Fps is (10 sec: 35225.5, 60 sec: 42598.5, 300 sec: 43098.3). Total num frames: 1167679488. Throughput: 0: 10385.0. Samples: 41894880. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:13,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:14,452][626795] Updated weights for policy 0, policy_version 142542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:16,331][626795] Updated weights for policy 0, policy_version 142552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:18,382][626795] Updated weights for policy 0, policy_version 142562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:18,976][24592] Fps is (10 sec: 44236.4, 60 sec: 42188.7, 300 sec: 43070.4). Total num frames: 1167892480. Throughput: 0: 10962.1. Samples: 41962212. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:18,977][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:20,289][626795] Updated weights for policy 0, policy_version 142572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:22,176][626795] Updated weights for policy 0, policy_version 142582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:23,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42052.2, 300 sec: 43042.7). Total num frames: 1168105472. Throughput: 0: 10788.5. Samples: 42025116. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:23,976][24592] Avg episode reward: [(0, '4.244')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:23,995][626795] Updated weights for policy 0, policy_version 142592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:25,884][626795] Updated weights for policy 0, policy_version 142602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:27,723][626795] Updated weights for policy 0, policy_version 142612 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:28,975][24592] Fps is (10 sec: 43418.2, 60 sec: 42052.3, 300 sec: 43403.7). Total num frames: 1168326656. Throughput: 0: 10726.8. Samples: 42058296. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:28,978][24592] Avg episode reward: [(0, '4.412')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:29,494][626795] Updated weights for policy 0, policy_version 142622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:31,390][626795] Updated weights for policy 0, policy_version 142632 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:33,367][626795] Updated weights for policy 0, policy_version 142642 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:33,975][24592] Fps is (10 sec: 44237.0, 60 sec: 43554.2, 300 sec: 43348.2). Total num frames: 1168547840. Throughput: 0: 10598.0. Samples: 42124884. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:33,976][24592] Avg episode reward: [(0, '4.292')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:35,116][626795] Updated weights for policy 0, policy_version 142652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:36,976][626795] Updated weights for policy 0, policy_version 142662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:38,821][626795] Updated weights for policy 0, policy_version 142672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:38,976][24592] Fps is (10 sec: 45052.5, 60 sec: 43417.8, 300 sec: 43292.5). Total num frames: 1168777216. Throughput: 0: 10526.2. Samples: 42191148. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:38,977][24592] Avg episode reward: [(0, '4.490')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:42,626][626795] Updated weights for policy 0, policy_version 142682 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:43,975][24592] Fps is (10 sec: 36044.8, 60 sec: 41642.7, 300 sec: 42931.6). Total num frames: 1168908288. Throughput: 0: 10078.0. Samples: 42203760. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:43,976][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:44,354][626795] Updated weights for policy 0, policy_version 142692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:46,232][626795] Updated weights for policy 0, policy_version 142702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:48,140][626795] Updated weights for policy 0, policy_version 142712 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:48,975][24592] Fps is (10 sec: 35228.2, 60 sec: 41369.5, 300 sec: 42876.1). Total num frames: 1169129472. Throughput: 0: 10006.6. Samples: 42269766. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:48,976][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:49,960][626795] Updated weights for policy 0, policy_version 142722 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:51,790][626795] Updated weights for policy 0, policy_version 142732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:53,654][626795] Updated weights for policy 0, policy_version 142742 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:53,975][24592] Fps is (10 sec: 44236.4, 60 sec: 41234.1, 300 sec: 42876.1). Total num frames: 1169350656. Throughput: 0: 10565.7. Samples: 42335952. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:53,977][24592] Avg episode reward: [(0, '4.308')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:55,513][626795] Updated weights for policy 0, policy_version 142752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:57,410][626795] Updated weights for policy 0, policy_version 142762 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:58,975][24592] Fps is (10 sec: 44236.7, 60 sec: 41369.6, 300 sec: 43200.9). Total num frames: 1169571840. Throughput: 0: 10540.4. Samples: 42369198. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:26:58,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:26:59,244][626795] Updated weights for policy 0, policy_version 142772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:01,138][626795] Updated weights for policy 0, policy_version 142782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:02,911][626795] Updated weights for policy 0, policy_version 142792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:03,975][24592] Fps is (10 sec: 44237.0, 60 sec: 41096.5, 300 sec: 43209.4). Total num frames: 1169793024. Throughput: 0: 10512.3. Samples: 42435264. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:03,977][24592] Avg episode reward: [(0, '4.334')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000142797_1169793024.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:04,060][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000141543_1159520256.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:04,851][626795] Updated weights for policy 0, policy_version 142802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:06,719][626795] Updated weights for policy 0, policy_version 142812 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:08,578][626795] Updated weights for policy 0, policy_version 142822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:08,975][24592] Fps is (10 sec: 44237.5, 60 sec: 42735.0, 300 sec: 43153.8). Total num frames: 1170014208. Throughput: 0: 10588.0. Samples: 42501576. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:08,976][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:10,366][626795] Updated weights for policy 0, policy_version 142832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:12,344][626795] Updated weights for policy 0, policy_version 142842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:15,272][24592] Fps is (10 sec: 36983.5, 60 sec: 41296.3, 300 sec: 42799.1). Total num frames: 1170210816. Throughput: 0: 10279.4. Samples: 42534198. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:15,273][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:16,028][626795] Updated weights for policy 0, policy_version 142852 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:17,789][626795] Updated weights for policy 0, policy_version 142862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:18,975][24592] Fps is (10 sec: 35225.5, 60 sec: 41233.2, 300 sec: 42709.5). Total num frames: 1170366464. Throughput: 0: 10122.3. Samples: 42580386. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:18,977][24592] Avg episode reward: [(0, '4.261')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:19,750][626795] Updated weights for policy 0, policy_version 142872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:21,468][626795] Updated weights for policy 0, policy_version 142882 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:23,276][626795] Updated weights for policy 0, policy_version 142892 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:23,975][24592] Fps is (10 sec: 44239.2, 60 sec: 41506.2, 300 sec: 42709.5). Total num frames: 1170595840. Throughput: 0: 10118.7. Samples: 42646482. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:23,977][24592] Avg episode reward: [(0, '4.386')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:25,281][626795] Updated weights for policy 0, policy_version 142902 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:27,211][626795] Updated weights for policy 0, policy_version 142912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:28,932][626795] Updated weights for policy 0, policy_version 142922 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:28,975][24592] Fps is (10 sec: 45055.9, 60 sec: 41506.2, 300 sec: 42681.7). Total num frames: 1170817024. Throughput: 0: 10564.4. Samples: 42679158. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:28,977][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:30,890][626795] Updated weights for policy 0, policy_version 142932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:32,710][626795] Updated weights for policy 0, policy_version 142942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:33,975][24592] Fps is (10 sec: 44237.0, 60 sec: 41506.2, 300 sec: 43021.8). Total num frames: 1171038208. Throughput: 0: 10573.4. Samples: 42745566. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:33,976][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:34,553][626795] Updated weights for policy 0, policy_version 142952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:36,353][626795] Updated weights for policy 0, policy_version 142962 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:38,289][626795] Updated weights for policy 0, policy_version 142972 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:38,975][24592] Fps is (10 sec: 44236.5, 60 sec: 41370.1, 300 sec: 43014.9). Total num frames: 1171259392. Throughput: 0: 10578.5. Samples: 42811986. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:38,977][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:40,099][626795] Updated weights for policy 0, policy_version 142982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:42,028][626795] Updated weights for policy 0, policy_version 142992 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:43,706][626795] Updated weights for policy 0, policy_version 143002 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:43,975][24592] Fps is (10 sec: 44237.1, 60 sec: 42871.5, 300 sec: 42959.4). Total num frames: 1171480576. Throughput: 0: 10573.8. Samples: 42845016. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:43,976][24592] Avg episode reward: [(0, '4.337')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:45,609][626795] Updated weights for policy 0, policy_version 143012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:47,357][626795] Updated weights for policy 0, policy_version 143022 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:49,422][24592] Fps is (10 sec: 36072.5, 60 sec: 41470.6, 300 sec: 42617.2). Total num frames: 1171636224. Throughput: 0: 9757.6. Samples: 42878712. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:49,423][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:51,233][626795] Updated weights for policy 0, policy_version 143032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:52,930][626795] Updated weights for policy 0, policy_version 143042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:53,975][24592] Fps is (10 sec: 36044.5, 60 sec: 41506.2, 300 sec: 42570.6). Total num frames: 1171841024. Throughput: 0: 10134.4. Samples: 42957624. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:53,977][24592] Avg episode reward: [(0, '4.339')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:54,970][626795] Updated weights for policy 0, policy_version 143052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:56,797][626795] Updated weights for policy 0, policy_version 143062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:27:58,553][626795] Updated weights for policy 0, policy_version 143072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:58,975][24592] Fps is (10 sec: 44589.7, 60 sec: 41506.2, 300 sec: 42542.9). Total num frames: 1172062208. Throughput: 0: 10437.4. Samples: 42990348. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:27:58,976][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:00,442][626795] Updated weights for policy 0, policy_version 143082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:02,374][626795] Updated weights for policy 0, policy_version 143092 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:03,975][24592] Fps is (10 sec: 44236.7, 60 sec: 41506.2, 300 sec: 42515.1). Total num frames: 1172283392. Throughput: 0: 10589.6. Samples: 43056918. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:03,978][24592] Avg episode reward: [(0, '4.333')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:04,151][626795] Updated weights for policy 0, policy_version 143102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:06,025][626795] Updated weights for policy 0, policy_version 143112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:07,784][626795] Updated weights for policy 0, policy_version 143122 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:08,975][24592] Fps is (10 sec: 43417.9, 60 sec: 41369.6, 300 sec: 42853.5). Total num frames: 1172496384. Throughput: 0: 10606.1. Samples: 43123758. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:08,976][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:09,704][626795] Updated weights for policy 0, policy_version 143132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:11,534][626795] Updated weights for policy 0, policy_version 143142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:13,406][626795] Updated weights for policy 0, policy_version 143152 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:13,975][24592] Fps is (10 sec: 44236.8, 60 sec: 42841.7, 300 sec: 42848.4). Total num frames: 1172725760. Throughput: 0: 10620.8. Samples: 43157094. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:13,977][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:15,184][626795] Updated weights for policy 0, policy_version 143162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:17,167][626795] Updated weights for policy 0, policy_version 143172 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:17,323][626772] Signal inference workers to stop experience collection... (650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:17,326][626772] Signal inference workers to resume experience collection... (650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:17,339][626795] InferenceWorker_p0-w0: stopping experience collection (650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:17,346][626795] InferenceWorker_p0-w0: resuming experience collection (650 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:18,841][626795] Updated weights for policy 0, policy_version 143182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:18,975][24592] Fps is (10 sec: 45055.9, 60 sec: 43008.0, 300 sec: 42765.0). Total num frames: 1172946944. Throughput: 0: 10618.1. Samples: 43223382. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:18,976][24592] Avg episode reward: [(0, '4.320')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:20,670][626795] Updated weights for policy 0, policy_version 143192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:23,975][24592] Fps is (10 sec: 36044.8, 60 sec: 41506.1, 300 sec: 42431.8). Total num frames: 1173086208. Throughput: 0: 10166.3. Samples: 43269468. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:23,977][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:24,507][626795] Updated weights for policy 0, policy_version 143202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:26,304][626795] Updated weights for policy 0, policy_version 143212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:28,149][626795] Updated weights for policy 0, policy_version 143222 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:28,976][24592] Fps is (10 sec: 36043.9, 60 sec: 41506.0, 300 sec: 42376.3). Total num frames: 1173307392. Throughput: 0: 10170.2. Samples: 43302678. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:28,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:29,988][626795] Updated weights for policy 0, policy_version 143232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:31,902][626795] Updated weights for policy 0, policy_version 143242 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:33,689][626795] Updated weights for policy 0, policy_version 143252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:33,975][24592] Fps is (10 sec: 44236.8, 60 sec: 41506.1, 300 sec: 42348.6). Total num frames: 1173528576. Throughput: 0: 11005.8. Samples: 43369056. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:33,977][24592] Avg episode reward: [(0, '4.462')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:35,514][626795] Updated weights for policy 0, policy_version 143262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:37,393][626795] Updated weights for policy 0, policy_version 143272 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:38,975][24592] Fps is (10 sec: 45056.6, 60 sec: 41642.6, 300 sec: 42376.3). Total num frames: 1173757952. Throughput: 0: 10626.0. Samples: 43435794. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:38,977][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:39,305][626795] Updated weights for policy 0, policy_version 143282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:41,002][626795] Updated weights for policy 0, policy_version 143292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:42,977][626795] Updated weights for policy 0, policy_version 143302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:43,975][24592] Fps is (10 sec: 44236.5, 60 sec: 41506.0, 300 sec: 42692.3). Total num frames: 1173970944. Throughput: 0: 10635.6. Samples: 43468950. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:43,978][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:44,791][626795] Updated weights for policy 0, policy_version 143312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:46,681][626795] Updated weights for policy 0, policy_version 143322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:48,365][626795] Updated weights for policy 0, policy_version 143332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:48,975][24592] Fps is (10 sec: 44237.2, 60 sec: 43055.4, 300 sec: 42710.9). Total num frames: 1174200320. Throughput: 0: 10650.7. Samples: 43536198. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:48,976][24592] Avg episode reward: [(0, '4.387')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:50,203][626795] Updated weights for policy 0, policy_version 143342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:52,075][626795] Updated weights for policy 0, policy_version 143352 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:53,940][626795] Updated weights for policy 0, policy_version 143362 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:53,975][24592] Fps is (10 sec: 45056.1, 60 sec: 43008.0, 300 sec: 42626.2). Total num frames: 1174421504. Throughput: 0: 10648.0. Samples: 43602918. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:53,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:55,761][626795] Updated weights for policy 0, policy_version 143372 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:58,975][24592] Fps is (10 sec: 35225.8, 60 sec: 41506.2, 300 sec: 42265.2). Total num frames: 1174552576. Throughput: 0: 10459.5. Samples: 43627770. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:28:58,977][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:28:59,645][626795] Updated weights for policy 0, policy_version 143382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:01,453][626795] Updated weights for policy 0, policy_version 143392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:03,272][626795] Updated weights for policy 0, policy_version 143402 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:03,975][24592] Fps is (10 sec: 35225.6, 60 sec: 41506.1, 300 sec: 42209.6). Total num frames: 1174773760. Throughput: 0: 10170.4. Samples: 43681050. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:03,976][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:04,019][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000143406_1174781952.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:04,085][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000142168_1164640256.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:05,132][626795] Updated weights for policy 0, policy_version 143412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:07,140][626795] Updated weights for policy 0, policy_version 143422 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:08,864][626795] Updated weights for policy 0, policy_version 143432 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:08,975][24592] Fps is (10 sec: 44236.4, 60 sec: 41642.6, 300 sec: 42181.9). Total num frames: 1174994944. Throughput: 0: 10609.1. Samples: 43746876. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:08,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:10,774][626795] Updated weights for policy 0, policy_version 143442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:12,524][626795] Updated weights for policy 0, policy_version 143452 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:13,976][24592] Fps is (10 sec: 44236.4, 60 sec: 41506.1, 300 sec: 42181.9). Total num frames: 1175216128. Throughput: 0: 10609.5. Samples: 43780104. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:13,977][24592] Avg episode reward: [(0, '4.371')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:14,370][626795] Updated weights for policy 0, policy_version 143462 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:16,308][626795] Updated weights for policy 0, policy_version 143472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:18,051][626795] Updated weights for policy 0, policy_version 143482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:18,975][24592] Fps is (10 sec: 44237.0, 60 sec: 41506.1, 300 sec: 42515.1). Total num frames: 1175437312. Throughput: 0: 10624.5. Samples: 43847160. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:18,977][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:19,922][626795] Updated weights for policy 0, policy_version 143492 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:21,781][626795] Updated weights for policy 0, policy_version 143502 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:23,662][626795] Updated weights for policy 0, policy_version 143512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:23,976][24592] Fps is (10 sec: 44236.7, 60 sec: 42871.4, 300 sec: 42487.3). Total num frames: 1175658496. Throughput: 0: 10622.3. Samples: 43913796. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:23,976][24592] Avg episode reward: [(0, '4.320')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:25,428][626795] Updated weights for policy 0, policy_version 143522 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:27,335][626795] Updated weights for policy 0, policy_version 143532 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:28,976][24592] Fps is (10 sec: 44235.4, 60 sec: 42871.4, 300 sec: 42431.7). Total num frames: 1175879680. Throughput: 0: 10620.1. Samples: 43946856. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:28,977][24592] Avg episode reward: [(0, '4.422')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:29,162][626795] Updated weights for policy 0, policy_version 143542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:32,935][626795] Updated weights for policy 0, policy_version 143552 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:33,975][24592] Fps is (10 sec: 36045.2, 60 sec: 41506.1, 300 sec: 42070.8). Total num frames: 1176018944. Throughput: 0: 10112.7. Samples: 43991268. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:33,976][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:34,819][626795] Updated weights for policy 0, policy_version 143562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:36,697][626795] Updated weights for policy 0, policy_version 143572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:38,426][626795] Updated weights for policy 0, policy_version 143582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:38,975][24592] Fps is (10 sec: 36046.1, 60 sec: 41369.7, 300 sec: 42015.3). Total num frames: 1176240128. Throughput: 0: 10120.3. Samples: 44058330. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:38,976][24592] Avg episode reward: [(0, '4.399')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:40,411][626795] Updated weights for policy 0, policy_version 143592 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:42,239][626795] Updated weights for policy 0, policy_version 143602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:43,975][24592] Fps is (10 sec: 44237.0, 60 sec: 41506.2, 300 sec: 41987.5). Total num frames: 1176461312. Throughput: 0: 10291.5. Samples: 44090886. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:43,977][24592] Avg episode reward: [(0, '4.384')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:44,032][626795] Updated weights for policy 0, policy_version 143612 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:45,799][626795] Updated weights for policy 0, policy_version 143622 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:47,800][626795] Updated weights for policy 0, policy_version 143632 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:48,975][24592] Fps is (10 sec: 44236.4, 60 sec: 41369.6, 300 sec: 42015.2). Total num frames: 1176682496. Throughput: 0: 10609.9. Samples: 44158494. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:48,976][24592] Avg episode reward: [(0, '4.420')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:49,565][626795] Updated weights for policy 0, policy_version 143642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:51,376][626795] Updated weights for policy 0, policy_version 143652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:53,197][626795] Updated weights for policy 0, policy_version 143662 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:53,975][24592] Fps is (10 sec: 44236.6, 60 sec: 41369.6, 300 sec: 42348.5). Total num frames: 1176903680. Throughput: 0: 10629.9. Samples: 44225220. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:53,978][24592] Avg episode reward: [(0, '4.433')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:55,070][626795] Updated weights for policy 0, policy_version 143672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:56,923][626795] Updated weights for policy 0, policy_version 143682 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:29:58,813][626795] Updated weights for policy 0, policy_version 143692 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:58,976][24592] Fps is (10 sec: 44234.5, 60 sec: 42871.0, 300 sec: 42320.6). Total num frames: 1177124864. Throughput: 0: 10620.3. Samples: 44258022. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:29:58,978][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:00,641][626795] Updated weights for policy 0, policy_version 143702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:02,563][626795] Updated weights for policy 0, policy_version 143712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:03,975][24592] Fps is (10 sec: 45055.7, 60 sec: 43007.9, 300 sec: 42265.1). Total num frames: 1177354240. Throughput: 0: 10603.4. Samples: 44324316. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:03,977][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:06,397][626795] Updated weights for policy 0, policy_version 143722 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:08,135][626795] Updated weights for policy 0, policy_version 143732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:08,976][24592] Fps is (10 sec: 36043.9, 60 sec: 41505.6, 300 sec: 41904.1). Total num frames: 1177485312. Throughput: 0: 10103.3. Samples: 44368452. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:08,978][24592] Avg episode reward: [(0, '4.326')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:10,147][626795] Updated weights for policy 0, policy_version 143742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:12,128][626795] Updated weights for policy 0, policy_version 143752 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:13,761][626795] Updated weights for policy 0, policy_version 143762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:13,975][24592] Fps is (10 sec: 34406.5, 60 sec: 41369.6, 300 sec: 41820.8). Total num frames: 1177698304. Throughput: 0: 10086.9. Samples: 44400762. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:13,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:15,707][626795] Updated weights for policy 0, policy_version 143772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:17,655][626795] Updated weights for policy 0, policy_version 143782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:18,976][24592] Fps is (10 sec: 43419.4, 60 sec: 41369.3, 300 sec: 41820.8). Total num frames: 1177919488. Throughput: 0: 10576.7. Samples: 44467224. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:18,978][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:19,376][626795] Updated weights for policy 0, policy_version 143792 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:21,347][626795] Updated weights for policy 0, policy_version 143802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:23,203][626795] Updated weights for policy 0, policy_version 143812 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:23,975][24592] Fps is (10 sec: 44236.8, 60 sec: 41369.7, 300 sec: 41820.9). Total num frames: 1178140672. Throughput: 0: 10543.2. Samples: 44532774. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:23,977][24592] Avg episode reward: [(0, '4.384')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:25,007][626795] Updated weights for policy 0, policy_version 143822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:26,819][626795] Updated weights for policy 0, policy_version 143832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:28,820][626795] Updated weights for policy 0, policy_version 143842 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:28,976][24592] Fps is (10 sec: 44237.0, 60 sec: 41369.6, 300 sec: 42126.3). Total num frames: 1178361856. Throughput: 0: 10570.2. Samples: 44566548. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:28,977][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:30,579][626795] Updated weights for policy 0, policy_version 143852 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:32,430][626795] Updated weights for policy 0, policy_version 143862 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:33,975][24592] Fps is (10 sec: 44236.9, 60 sec: 42734.9, 300 sec: 42070.9). Total num frames: 1178583040. Throughput: 0: 10527.1. Samples: 44632212. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:33,976][24592] Avg episode reward: [(0, '4.372')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:34,314][626795] Updated weights for policy 0, policy_version 143872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:36,166][626795] Updated weights for policy 0, policy_version 143882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:38,008][626795] Updated weights for policy 0, policy_version 143892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:40,521][24592] Fps is (10 sec: 36187.3, 60 sec: 41262.4, 300 sec: 41713.4). Total num frames: 1178779648. Throughput: 0: 9455.0. Samples: 44665308. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:40,522][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:41,777][626795] Updated weights for policy 0, policy_version 143902 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:43,712][626795] Updated weights for policy 0, policy_version 143912 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:43,976][24592] Fps is (10 sec: 35223.5, 60 sec: 41232.6, 300 sec: 41654.1). Total num frames: 1178935296. Throughput: 0: 10049.6. Samples: 44710254. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:43,977][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:45,583][626795] Updated weights for policy 0, policy_version 143922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:47,402][626795] Updated weights for policy 0, policy_version 143932 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:48,975][24592] Fps is (10 sec: 44571.7, 60 sec: 41233.0, 300 sec: 41626.7). Total num frames: 1179156480. Throughput: 0: 10029.2. Samples: 44775630. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:48,978][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:49,279][626795] Updated weights for policy 0, policy_version 143942 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:51,214][626795] Updated weights for policy 0, policy_version 143952 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:53,030][626795] Updated weights for policy 0, policy_version 143962 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:53,976][24592] Fps is (10 sec: 43418.1, 60 sec: 41096.2, 300 sec: 41626.4). Total num frames: 1179369472. Throughput: 0: 10510.1. Samples: 44841402. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:53,977][24592] Avg episode reward: [(0, '4.256')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:54,896][626795] Updated weights for policy 0, policy_version 143972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:56,674][626795] Updated weights for policy 0, policy_version 143982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:30:58,575][626795] Updated weights for policy 0, policy_version 143992 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:58,975][24592] Fps is (10 sec: 43417.9, 60 sec: 41096.9, 300 sec: 41570.9). Total num frames: 1179590656. Throughput: 0: 10537.2. Samples: 44874936. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:30:58,976][24592] Avg episode reward: [(0, '4.383')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:00,536][626795] Updated weights for policy 0, policy_version 144002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:02,280][626795] Updated weights for policy 0, policy_version 144012 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:03,975][24592] Fps is (10 sec: 45058.1, 60 sec: 41096.6, 300 sec: 41931.9). Total num frames: 1179820032. Throughput: 0: 10534.4. Samples: 44941266. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:03,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000144021_1179820032.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000142797_1169793024.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:04,093][626795] Updated weights for policy 0, policy_version 144022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:05,952][626795] Updated weights for policy 0, policy_version 144032 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:07,876][626795] Updated weights for policy 0, policy_version 144042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:08,976][24592] Fps is (10 sec: 45055.5, 60 sec: 42598.9, 300 sec: 41904.1). Total num frames: 1180041216. Throughput: 0: 10544.4. Samples: 45007272. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:08,977][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:09,741][626795] Updated weights for policy 0, policy_version 144052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:11,615][626795] Updated weights for policy 0, policy_version 144062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:14,772][24592] Fps is (10 sec: 35661.1, 60 sec: 41231.7, 300 sec: 41625.1). Total num frames: 1180205056. Throughput: 0: 10345.3. Samples: 45040326. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:14,773][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:15,483][626795] Updated weights for policy 0, policy_version 144072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:17,182][626795] Updated weights for policy 0, policy_version 144082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:18,975][24592] Fps is (10 sec: 35226.2, 60 sec: 41233.4, 300 sec: 41654.2). Total num frames: 1180393472. Throughput: 0: 10058.4. Samples: 45084840. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:18,976][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:19,008][626795] Updated weights for policy 0, policy_version 144092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:20,933][626795] Updated weights for policy 0, policy_version 144102 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:22,811][626795] Updated weights for policy 0, policy_version 144112 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:23,975][24592] Fps is (10 sec: 43615.9, 60 sec: 41096.5, 300 sec: 41626.5). Total num frames: 1180606464. Throughput: 0: 11187.7. Samples: 45151464. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:23,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:24,704][626795] Updated weights for policy 0, policy_version 144122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:26,545][626795] Updated weights for policy 0, policy_version 144132 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:28,305][626795] Updated weights for policy 0, policy_version 144142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:28,976][24592] Fps is (10 sec: 44235.0, 60 sec: 41233.0, 300 sec: 41654.2). Total num frames: 1180835840. Throughput: 0: 10539.9. Samples: 45184548. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:28,977][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:30,233][626795] Updated weights for policy 0, policy_version 144152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:32,018][626795] Updated weights for policy 0, policy_version 144162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:33,975][24592] Fps is (10 sec: 44237.0, 60 sec: 41096.6, 300 sec: 41598.8). Total num frames: 1181048832. Throughput: 0: 10566.6. Samples: 45251124. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:33,977][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:34,002][626795] Updated weights for policy 0, policy_version 144172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:35,883][626795] Updated weights for policy 0, policy_version 144182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:37,704][626795] Updated weights for policy 0, policy_version 144192 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:38,975][24592] Fps is (10 sec: 44238.6, 60 sec: 42743.8, 300 sec: 41931.9). Total num frames: 1181278208. Throughput: 0: 10561.6. Samples: 45316668. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:38,976][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:39,572][626795] Updated weights for policy 0, policy_version 144202 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:41,378][626795] Updated weights for policy 0, policy_version 144212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:43,270][626795] Updated weights for policy 0, policy_version 144222 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:43,975][24592] Fps is (10 sec: 44236.4, 60 sec: 42598.8, 300 sec: 41904.2). Total num frames: 1181491200. Throughput: 0: 10551.3. Samples: 45349746. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:43,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:45,190][626795] Updated weights for policy 0, policy_version 144232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:49,018][24592] Fps is (10 sec: 34260.2, 60 sec: 41067.4, 300 sec: 41592.7). Total num frames: 1181622272. Throughput: 0: 9792.5. Samples: 45382344. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:49,019][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:49,033][626795] Updated weights for policy 0, policy_version 144242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:50,767][626795] Updated weights for policy 0, policy_version 144252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:52,745][626795] Updated weights for policy 0, policy_version 144262 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:53,975][24592] Fps is (10 sec: 35225.9, 60 sec: 41233.4, 300 sec: 41598.7). Total num frames: 1181843456. Throughput: 0: 10047.8. Samples: 45459420. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:53,977][24592] Avg episode reward: [(0, '4.349')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:54,593][626795] Updated weights for policy 0, policy_version 144272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:56,542][626795] Updated weights for policy 0, policy_version 144282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:31:58,261][626795] Updated weights for policy 0, policy_version 144292 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:58,975][24592] Fps is (10 sec: 45249.3, 60 sec: 41369.7, 300 sec: 41626.5). Total num frames: 1182072832. Throughput: 0: 10224.7. Samples: 45492288. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:31:58,976][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:00,251][626795] Updated weights for policy 0, policy_version 144302 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:02,049][626795] Updated weights for policy 0, policy_version 144312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:03,938][626795] Updated weights for policy 0, policy_version 144322 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:03,975][24592] Fps is (10 sec: 44236.6, 60 sec: 41096.5, 300 sec: 41598.7). Total num frames: 1182285824. Throughput: 0: 10516.8. Samples: 45558096. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:03,977][24592] Avg episode reward: [(0, '4.295')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:05,645][626795] Updated weights for policy 0, policy_version 144332 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:07,681][626795] Updated weights for policy 0, policy_version 144342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:08,975][24592] Fps is (10 sec: 43416.9, 60 sec: 41096.6, 300 sec: 41866.0). Total num frames: 1182507008. Throughput: 0: 10516.7. Samples: 45624714. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:08,977][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:09,442][626795] Updated weights for policy 0, policy_version 144352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:11,335][626795] Updated weights for policy 0, policy_version 144362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:13,194][626795] Updated weights for policy 0, policy_version 144372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:13,975][24592] Fps is (10 sec: 44237.0, 60 sec: 42618.2, 300 sec: 41904.2). Total num frames: 1182728192. Throughput: 0: 10523.3. Samples: 45658092. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:13,977][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:15,052][626795] Updated weights for policy 0, policy_version 144382 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:16,839][626795] Updated weights for policy 0, policy_version 144392 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:18,596][626795] Updated weights for policy 0, policy_version 144402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:18,976][24592] Fps is (10 sec: 44236.4, 60 sec: 42598.2, 300 sec: 41876.4). Total num frames: 1182949376. Throughput: 0: 10516.6. Samples: 45724374. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:18,976][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:20,524][626795] Updated weights for policy 0, policy_version 144412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:23,975][24592] Fps is (10 sec: 35225.4, 60 sec: 41233.1, 300 sec: 41570.9). Total num frames: 1183080448. Throughput: 0: 10048.4. Samples: 45768846. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:23,977][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:24,435][626795] Updated weights for policy 0, policy_version 144422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:26,320][626795] Updated weights for policy 0, policy_version 144432 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:28,050][626795] Updated weights for policy 0, policy_version 144442 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:28,975][24592] Fps is (10 sec: 35226.3, 60 sec: 41096.8, 300 sec: 41570.9). Total num frames: 1183301632. Throughput: 0: 10045.5. Samples: 45801792. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:28,977][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:29,996][626795] Updated weights for policy 0, policy_version 144452 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:31,781][626795] Updated weights for policy 0, policy_version 144462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:33,638][626795] Updated weights for policy 0, policy_version 144472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:33,976][24592] Fps is (10 sec: 44236.0, 60 sec: 41232.9, 300 sec: 41570.9). Total num frames: 1183522816. Throughput: 0: 10812.6. Samples: 45868452. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:33,977][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:35,476][626795] Updated weights for policy 0, policy_version 144482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:37,376][626795] Updated weights for policy 0, policy_version 144492 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:38,975][24592] Fps is (10 sec: 44236.8, 60 sec: 41096.5, 300 sec: 41570.9). Total num frames: 1183744000. Throughput: 0: 10566.1. Samples: 45934896. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:38,978][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:39,171][626795] Updated weights for policy 0, policy_version 144502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:41,064][626795] Updated weights for policy 0, policy_version 144512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:42,895][626795] Updated weights for policy 0, policy_version 144522 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:43,975][24592] Fps is (10 sec: 45057.3, 60 sec: 41369.7, 300 sec: 41884.3). Total num frames: 1183973376. Throughput: 0: 10574.3. Samples: 45968130. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:43,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:44,608][626795] Updated weights for policy 0, policy_version 144532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:46,420][626795] Updated weights for policy 0, policy_version 144542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:48,355][626795] Updated weights for policy 0, policy_version 144552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:48,975][24592] Fps is (10 sec: 44236.8, 60 sec: 42765.3, 300 sec: 41848.6). Total num frames: 1184186368. Throughput: 0: 10601.3. Samples: 46035156. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:48,977][24592] Avg episode reward: [(0, '4.337')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:50,215][626795] Updated weights for policy 0, policy_version 144562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:52,141][626795] Updated weights for policy 0, policy_version 144572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:53,975][24592] Fps is (10 sec: 43417.4, 60 sec: 42734.9, 300 sec: 41848.6). Total num frames: 1184407552. Throughput: 0: 10593.6. Samples: 46101426. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:53,977][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:53,993][626795] Updated weights for policy 0, policy_version 144582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:55,671][626795] Updated weights for policy 0, policy_version 144592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:57,636][626795] Updated weights for policy 0, policy_version 144602 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:58,976][24592] Fps is (10 sec: 44234.4, 60 sec: 42598.0, 300 sec: 41848.5). Total num frames: 1184628736. Throughput: 0: 10592.7. Samples: 46134768. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:32:58,977][24592] Avg episode reward: [(0, '4.310')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:32:59,415][626795] Updated weights for policy 0, policy_version 144612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:01,768][626795] Updated weights for policy 0, policy_version 144622 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:03,602][626795] Updated weights for policy 0, policy_version 144632 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:03,975][24592] Fps is (10 sec: 42597.8, 60 sec: 42461.8, 300 sec: 41820.8). Total num frames: 1184833536. Throughput: 0: 10488.2. Samples: 46196340. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:03,977][24592] Avg episode reward: [(0, '4.477')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:04,003][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000144634_1184841728.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:04,061][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000143406_1174781952.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:05,483][626795] Updated weights for policy 0, policy_version 144642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:07,278][626795] Updated weights for policy 0, policy_version 144652 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:08,957][626795] Updated weights for policy 0, policy_version 144662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:08,975][24592] Fps is (10 sec: 44239.2, 60 sec: 42735.0, 300 sec: 41848.6). Total num frames: 1185071104. Throughput: 0: 11024.9. Samples: 46264968. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:08,977][24592] Avg episode reward: [(0, '4.430')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:10,709][626795] Updated weights for policy 0, policy_version 144672 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:13,976][24592] Fps is (10 sec: 32767.5, 60 sec: 40550.2, 300 sec: 41404.3). Total num frames: 1185161216. Throughput: 0: 10854.6. Samples: 46290252. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:13,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:15,520][626795] Updated weights for policy 0, policy_version 144682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:17,321][626795] Updated weights for policy 0, policy_version 144692 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:18,975][24592] Fps is (10 sec: 31948.4, 60 sec: 40687.0, 300 sec: 41709.8). Total num frames: 1185390592. Throughput: 0: 10345.4. Samples: 46333992. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:18,983][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:19,133][626795] Updated weights for policy 0, policy_version 144702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:20,903][626795] Updated weights for policy 0, policy_version 144712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:22,678][626795] Updated weights for policy 0, policy_version 144722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:23,976][24592] Fps is (10 sec: 45056.2, 60 sec: 42188.7, 300 sec: 41709.8). Total num frames: 1185611776. Throughput: 0: 10376.1. Samples: 46401822. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:23,977][24592] Avg episode reward: [(0, '4.338')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:24,535][626795] Updated weights for policy 0, policy_version 144732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:26,259][626795] Updated weights for policy 0, policy_version 144742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:28,048][626795] Updated weights for policy 0, policy_version 144752 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:28,975][24592] Fps is (10 sec: 45056.6, 60 sec: 42325.3, 300 sec: 41737.5). Total num frames: 1185841152. Throughput: 0: 10409.5. Samples: 46436556. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:28,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:29,842][626795] Updated weights for policy 0, policy_version 144762 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:31,613][626795] Updated weights for policy 0, policy_version 144772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:33,368][626795] Updated weights for policy 0, policy_version 144782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:33,975][24592] Fps is (10 sec: 46695.4, 60 sec: 42598.6, 300 sec: 41765.3). Total num frames: 1186078720. Throughput: 0: 10440.9. Samples: 46504998. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:33,977][24592] Avg episode reward: [(0, '4.262')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:35,230][626795] Updated weights for policy 0, policy_version 144792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:36,991][626795] Updated weights for policy 0, policy_version 144802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:38,832][626795] Updated weights for policy 0, policy_version 144812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:38,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42734.9, 300 sec: 41820.9). Total num frames: 1186308096. Throughput: 0: 10499.6. Samples: 46573908. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:38,976][24592] Avg episode reward: [(0, '4.297')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:40,642][626795] Updated weights for policy 0, policy_version 144822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:42,403][626795] Updated weights for policy 0, policy_version 144832 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:43,975][24592] Fps is (10 sec: 45055.6, 60 sec: 42598.3, 300 sec: 41793.1). Total num frames: 1186529280. Throughput: 0: 10525.2. Samples: 46608396. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:43,977][24592] Avg episode reward: [(0, '4.430')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:44,164][626795] Updated weights for policy 0, policy_version 144842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:47,939][626795] Updated weights for policy 0, policy_version 144852 (0.1996)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:48,976][24592] Fps is (10 sec: 36044.4, 60 sec: 41369.5, 300 sec: 41515.4). Total num frames: 1186668544. Throughput: 0: 10268.1. Samples: 46658406. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:48,977][24592] Avg episode reward: [(0, '4.338')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:49,812][626795] Updated weights for policy 0, policy_version 144862 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:51,518][626795] Updated weights for policy 0, policy_version 144872 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:53,296][626795] Updated weights for policy 0, policy_version 144882 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:53,975][24592] Fps is (10 sec: 36864.5, 60 sec: 41506.2, 300 sec: 41848.6). Total num frames: 1186897920. Throughput: 0: 10180.9. Samples: 46723110. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:53,976][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:55,068][626795] Updated weights for policy 0, policy_version 144892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:56,836][626795] Updated weights for policy 0, policy_version 144902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:33:58,599][626795] Updated weights for policy 0, policy_version 144912 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:58,975][24592] Fps is (10 sec: 45875.4, 60 sec: 41643.0, 300 sec: 41876.4). Total num frames: 1187127296. Throughput: 0: 10388.5. Samples: 46757730. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:33:58,977][24592] Avg episode reward: [(0, '4.273')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:00,496][626795] Updated weights for policy 0, policy_version 144922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:02,138][626795] Updated weights for policy 0, policy_version 144932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:03,975][24592] Fps is (10 sec: 45874.9, 60 sec: 42052.3, 300 sec: 41904.2). Total num frames: 1187356672. Throughput: 0: 10947.5. Samples: 46826628. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:03,976][24592] Avg episode reward: [(0, '4.477')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:04,057][626795] Updated weights for policy 0, policy_version 144942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:05,790][626795] Updated weights for policy 0, policy_version 144952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:07,587][626795] Updated weights for policy 0, policy_version 144962 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:08,976][24592] Fps is (10 sec: 45874.2, 60 sec: 41915.5, 300 sec: 41931.9). Total num frames: 1187586048. Throughput: 0: 10962.4. Samples: 46895130. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:08,976][24592] Avg episode reward: [(0, '4.153')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:09,400][626795] Updated weights for policy 0, policy_version 144972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:11,189][626795] Updated weights for policy 0, policy_version 144982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:12,908][626795] Updated weights for policy 0, policy_version 144992 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:13,975][24592] Fps is (10 sec: 45874.9, 60 sec: 44237.0, 300 sec: 41959.7). Total num frames: 1187815424. Throughput: 0: 10950.9. Samples: 46929348. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:13,977][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:14,602][626795] Updated weights for policy 0, policy_version 145002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:16,370][626795] Updated weights for policy 0, policy_version 145012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:18,072][626795] Updated weights for policy 0, policy_version 145022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:18,975][24592] Fps is (10 sec: 47514.9, 60 sec: 44510.0, 300 sec: 42043.0). Total num frames: 1188061184. Throughput: 0: 11007.6. Samples: 47000340. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:18,976][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:19,747][626795] Updated weights for policy 0, policy_version 145032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:23,975][24592] Fps is (10 sec: 36044.6, 60 sec: 42735.0, 300 sec: 41682.0). Total num frames: 1188175872. Throughput: 0: 10433.4. Samples: 47043414. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:23,977][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:23,987][626795] Updated weights for policy 0, policy_version 145042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:25,760][626795] Updated weights for policy 0, policy_version 145052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:27,623][626795] Updated weights for policy 0, policy_version 145062 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:28,976][24592] Fps is (10 sec: 34404.6, 60 sec: 42734.6, 300 sec: 41987.4). Total num frames: 1188405248. Throughput: 0: 10398.2. Samples: 47076318. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:28,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:29,552][626795] Updated weights for policy 0, policy_version 145072 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:31,282][626795] Updated weights for policy 0, policy_version 145082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:33,066][626795] Updated weights for policy 0, policy_version 145092 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:33,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42598.4, 300 sec: 42015.2). Total num frames: 1188634624. Throughput: 0: 10798.0. Samples: 47144316. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:33,977][24592] Avg episode reward: [(0, '4.463')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:34,765][626795] Updated weights for policy 0, policy_version 145102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:36,616][626795] Updated weights for policy 0, policy_version 145112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:36,815][626772] Signal inference workers to stop experience collection... (700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:36,818][626772] Signal inference workers to resume experience collection... (700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:36,830][626795] InferenceWorker_p0-w0: stopping experience collection (700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:36,832][626795] InferenceWorker_p0-w0: resuming experience collection (700 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:38,329][626795] Updated weights for policy 0, policy_version 145122 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:38,978][24592] Fps is (10 sec: 45864.7, 60 sec: 42596.4, 300 sec: 42042.6). Total num frames: 1188864000. Throughput: 0: 10907.6. Samples: 47213982. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:38,979][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:40,086][626795] Updated weights for policy 0, policy_version 145132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:41,811][626795] Updated weights for policy 0, policy_version 145142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:43,488][626795] Updated weights for policy 0, policy_version 145152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:43,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42871.5, 300 sec: 42098.6). Total num frames: 1189101568. Throughput: 0: 10938.0. Samples: 47249940. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:43,977][24592] Avg episode reward: [(0, '4.338')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:45,219][626795] Updated weights for policy 0, policy_version 145162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:46,936][626795] Updated weights for policy 0, policy_version 145172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:48,755][626795] Updated weights for policy 0, policy_version 145182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:48,975][24592] Fps is (10 sec: 47526.8, 60 sec: 44509.9, 300 sec: 42154.1). Total num frames: 1189339136. Throughput: 0: 10976.7. Samples: 47320578. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:48,978][24592] Avg episode reward: [(0, '4.319')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:50,569][626795] Updated weights for policy 0, policy_version 145192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:52,250][626795] Updated weights for policy 0, policy_version 145202 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:53,976][24592] Fps is (10 sec: 45874.6, 60 sec: 44373.2, 300 sec: 42154.1). Total num frames: 1189560320. Throughput: 0: 10980.7. Samples: 47389260. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:53,978][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:54,205][626795] Updated weights for policy 0, policy_version 145212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:58,143][626795] Updated weights for policy 0, policy_version 145222 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:58,975][24592] Fps is (10 sec: 36045.2, 60 sec: 42871.6, 300 sec: 41848.6). Total num frames: 1189699584. Throughput: 0: 10536.3. Samples: 47403480. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:34:58,976][24592] Avg episode reward: [(0, '4.375')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:34:59,911][626795] Updated weights for policy 0, policy_version 145232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:01,666][626795] Updated weights for policy 0, policy_version 145242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:03,468][626795] Updated weights for policy 0, policy_version 145252 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:03,975][24592] Fps is (10 sec: 36045.1, 60 sec: 42734.9, 300 sec: 42154.2). Total num frames: 1189920768. Throughput: 0: 10384.0. Samples: 47467620. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:03,976][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:03,978][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000145254_1189920768.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:04,049][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000144021_1179820032.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:05,188][626795] Updated weights for policy 0, policy_version 145262 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:06,995][626795] Updated weights for policy 0, policy_version 145272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:08,626][626795] Updated weights for policy 0, policy_version 145282 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:08,976][24592] Fps is (10 sec: 45873.4, 60 sec: 42871.4, 300 sec: 42237.4). Total num frames: 1190158336. Throughput: 0: 10993.6. Samples: 47538126. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:08,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:10,371][626795] Updated weights for policy 0, policy_version 145292 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:12,035][626795] Updated weights for policy 0, policy_version 145302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:13,709][626795] Updated weights for policy 0, policy_version 145312 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:13,976][24592] Fps is (10 sec: 48331.3, 60 sec: 43144.3, 300 sec: 42320.7). Total num frames: 1190404096. Throughput: 0: 11066.8. Samples: 47574324. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:13,976][24592] Avg episode reward: [(0, '4.362')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:15,455][626795] Updated weights for policy 0, policy_version 145322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:17,141][626795] Updated weights for policy 0, policy_version 145332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:18,857][626795] Updated weights for policy 0, policy_version 145342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:18,975][24592] Fps is (10 sec: 48333.9, 60 sec: 43007.9, 300 sec: 42376.2). Total num frames: 1190641664. Throughput: 0: 11157.3. Samples: 47646396. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:18,977][24592] Avg episode reward: [(0, '4.251')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:20,578][626795] Updated weights for policy 0, policy_version 145352 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:22,324][626795] Updated weights for policy 0, policy_version 145362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:23,975][24592] Fps is (10 sec: 47515.3, 60 sec: 45056.1, 300 sec: 42431.8). Total num frames: 1190879232. Throughput: 0: 11181.2. Samples: 47717106. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:23,976][24592] Avg episode reward: [(0, '4.414')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:24,131][626795] Updated weights for policy 0, policy_version 145372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:25,910][626795] Updated weights for policy 0, policy_version 145382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:27,667][626795] Updated weights for policy 0, policy_version 145392 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:28,975][24592] Fps is (10 sec: 46694.7, 60 sec: 45056.4, 300 sec: 42459.6). Total num frames: 1191108608. Throughput: 0: 11141.2. Samples: 47751294. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:28,977][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:32,241][626795] Updated weights for policy 0, policy_version 145402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:33,975][24592] Fps is (10 sec: 32768.0, 60 sec: 42871.5, 300 sec: 42348.2). Total num frames: 1191206912. Throughput: 0: 10405.7. Samples: 47788836. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:33,977][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:34,091][626795] Updated weights for policy 0, policy_version 145412 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:36,032][626795] Updated weights for policy 0, policy_version 145422 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:37,907][626795] Updated weights for policy 0, policy_version 145432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:38,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42600.4, 300 sec: 42320.8). Total num frames: 1191419904. Throughput: 0: 10320.0. Samples: 47853660. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:38,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:39,817][626795] Updated weights for policy 0, policy_version 145442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:41,529][626795] Updated weights for policy 0, policy_version 145452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:43,193][626795] Updated weights for policy 0, policy_version 145462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:43,978][24592] Fps is (10 sec: 45046.4, 60 sec: 42596.9, 300 sec: 42375.9). Total num frames: 1191657472. Throughput: 0: 10765.6. Samples: 47887956. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:43,978][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:44,944][626795] Updated weights for policy 0, policy_version 145472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:46,683][626795] Updated weights for policy 0, policy_version 145482 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:48,326][626795] Updated weights for policy 0, policy_version 145492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:48,976][24592] Fps is (10 sec: 47512.8, 60 sec: 42598.3, 300 sec: 42459.6). Total num frames: 1191895040. Throughput: 0: 10940.5. Samples: 47959944. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:48,977][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:50,035][626795] Updated weights for policy 0, policy_version 145502 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:51,760][626795] Updated weights for policy 0, policy_version 145512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:53,482][626795] Updated weights for policy 0, policy_version 145522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:53,975][24592] Fps is (10 sec: 47524.2, 60 sec: 42871.6, 300 sec: 42515.1). Total num frames: 1192132608. Throughput: 0: 10969.4. Samples: 48031746. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:53,978][24592] Avg episode reward: [(0, '4.262')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:55,242][626795] Updated weights for policy 0, policy_version 145532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:56,790][626795] Updated weights for policy 0, policy_version 145542 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:35:58,639][626795] Updated weights for policy 0, policy_version 145552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:58,975][24592] Fps is (10 sec: 48333.7, 60 sec: 44646.4, 300 sec: 42570.6). Total num frames: 1192378368. Throughput: 0: 10977.8. Samples: 48068322. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:35:58,977][24592] Avg episode reward: [(0, '4.300')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:00,338][626795] Updated weights for policy 0, policy_version 145562 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:02,011][626795] Updated weights for policy 0, policy_version 145572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:03,813][626795] Updated weights for policy 0, policy_version 145582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:03,975][24592] Fps is (10 sec: 48332.8, 60 sec: 44919.6, 300 sec: 42626.2). Total num frames: 1192615936. Throughput: 0: 10956.6. Samples: 48139440. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:03,976][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:08,196][626795] Updated weights for policy 0, policy_version 145592 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:08,975][24592] Fps is (10 sec: 33586.8, 60 sec: 42598.5, 300 sec: 42518.8). Total num frames: 1192714240. Throughput: 0: 10244.7. Samples: 48178116. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:08,976][24592] Avg episode reward: [(0, '4.367')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:09,938][626795] Updated weights for policy 0, policy_version 145602 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:11,887][626795] Updated weights for policy 0, policy_version 145612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:13,736][626795] Updated weights for policy 0, policy_version 145622 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:13,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42325.6, 300 sec: 42542.9). Total num frames: 1192943616. Throughput: 0: 10216.3. Samples: 48211026. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:13,976][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:15,585][626795] Updated weights for policy 0, policy_version 145632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:17,300][626795] Updated weights for policy 0, policy_version 145642 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:18,932][626795] Updated weights for policy 0, policy_version 145652 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:18,975][24592] Fps is (10 sec: 46695.0, 60 sec: 42325.4, 300 sec: 42626.2). Total num frames: 1193181184. Throughput: 0: 10918.7. Samples: 48280176. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:18,976][24592] Avg episode reward: [(0, '4.356')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:20,678][626795] Updated weights for policy 0, policy_version 145662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:22,336][626795] Updated weights for policy 0, policy_version 145672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:23,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42325.4, 300 sec: 42654.0). Total num frames: 1193418752. Throughput: 0: 11066.8. Samples: 48351666. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:23,977][24592] Avg episode reward: [(0, '4.313')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:24,125][626795] Updated weights for policy 0, policy_version 145682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:25,794][626795] Updated weights for policy 0, policy_version 145692 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:27,433][626795] Updated weights for policy 0, policy_version 145702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:28,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42461.9, 300 sec: 42737.2). Total num frames: 1193656320. Throughput: 0: 11116.4. Samples: 48388170. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:28,976][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:29,197][626795] Updated weights for policy 0, policy_version 145712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:30,909][626795] Updated weights for policy 0, policy_version 145722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:32,666][626795] Updated weights for policy 0, policy_version 145732 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:33,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44783.0, 300 sec: 42765.0). Total num frames: 1193893888. Throughput: 0: 11107.8. Samples: 48459792. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:33,978][24592] Avg episode reward: [(0, '4.299')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:34,375][626795] Updated weights for policy 0, policy_version 145742 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:36,201][626795] Updated weights for policy 0, policy_version 145752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:37,905][626795] Updated weights for policy 0, policy_version 145762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:38,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45192.5, 300 sec: 42848.3). Total num frames: 1194131456. Throughput: 0: 11074.4. Samples: 48530094. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:38,976][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:42,317][626795] Updated weights for policy 0, policy_version 145772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:43,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42873.0, 300 sec: 42743.4). Total num frames: 1194229760. Throughput: 0: 10386.1. Samples: 48535698. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:43,977][24592] Avg episode reward: [(0, '4.383')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:44,328][626795] Updated weights for policy 0, policy_version 145782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:46,116][626795] Updated weights for policy 0, policy_version 145792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:47,869][626795] Updated weights for policy 0, policy_version 145802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:48,976][24592] Fps is (10 sec: 31947.6, 60 sec: 42598.3, 300 sec: 42737.2). Total num frames: 1194450944. Throughput: 0: 10243.9. Samples: 48600420. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:48,977][24592] Avg episode reward: [(0, '4.350')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:49,803][626795] Updated weights for policy 0, policy_version 145812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:51,410][626795] Updated weights for policy 0, policy_version 145822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:53,125][626795] Updated weights for policy 0, policy_version 145832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:53,975][24592] Fps is (10 sec: 46694.6, 60 sec: 42734.9, 300 sec: 42792.8). Total num frames: 1194696704. Throughput: 0: 10949.0. Samples: 48670818. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:53,977][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:54,845][626795] Updated weights for policy 0, policy_version 145842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:56,526][626795] Updated weights for policy 0, policy_version 145852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:58,257][626795] Updated weights for policy 0, policy_version 145862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:58,975][24592] Fps is (10 sec: 48334.2, 60 sec: 42598.3, 300 sec: 42876.1). Total num frames: 1194934272. Throughput: 0: 11018.4. Samples: 48706854. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:36:58,976][24592] Avg episode reward: [(0, '4.433')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:36:59,987][626795] Updated weights for policy 0, policy_version 145872 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:01,738][626795] Updated weights for policy 0, policy_version 145882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:03,391][626795] Updated weights for policy 0, policy_version 145892 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:03,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42598.4, 300 sec: 42931.7). Total num frames: 1195171840. Throughput: 0: 11075.1. Samples: 48778554. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:03,976][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000145895_1195171840.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000144634_1184841728.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:05,106][626795] Updated weights for policy 0, policy_version 145902 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:06,847][626795] Updated weights for policy 0, policy_version 145912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:08,654][626795] Updated weights for policy 0, policy_version 145922 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:08,976][24592] Fps is (10 sec: 47513.6, 60 sec: 44919.5, 300 sec: 42987.2). Total num frames: 1195409408. Throughput: 0: 11054.9. Samples: 48849138. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:08,976][24592] Avg episode reward: [(0, '4.359')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:10,438][626795] Updated weights for policy 0, policy_version 145932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:12,136][626795] Updated weights for policy 0, policy_version 145942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:13,884][626795] Updated weights for policy 0, policy_version 145952 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:13,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44919.5, 300 sec: 43015.0). Total num frames: 1195638784. Throughput: 0: 11029.6. Samples: 48884502. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:13,976][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:18,426][626795] Updated weights for policy 0, policy_version 145962 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:18,975][24592] Fps is (10 sec: 32768.2, 60 sec: 42598.4, 300 sec: 42903.9). Total num frames: 1195737088. Throughput: 0: 10281.5. Samples: 48922458. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:18,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:20,256][626795] Updated weights for policy 0, policy_version 145972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:22,083][626795] Updated weights for policy 0, policy_version 145982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:23,972][626795] Updated weights for policy 0, policy_version 145992 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:23,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42461.9, 300 sec: 42931.6). Total num frames: 1195966464. Throughput: 0: 10187.5. Samples: 48988530. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:23,978][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:25,753][626795] Updated weights for policy 0, policy_version 146002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:27,420][626795] Updated weights for policy 0, policy_version 146012 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:28,976][24592] Fps is (10 sec: 45875.4, 60 sec: 42325.4, 300 sec: 42959.4). Total num frames: 1196195840. Throughput: 0: 10841.9. Samples: 49023582. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:28,977][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:29,123][626795] Updated weights for policy 0, policy_version 146022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:30,872][626795] Updated weights for policy 0, policy_version 146032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:32,608][626795] Updated weights for policy 0, policy_version 146042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:33,975][24592] Fps is (10 sec: 46694.0, 60 sec: 42325.3, 300 sec: 43014.9). Total num frames: 1196433408. Throughput: 0: 10980.7. Samples: 49094550. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:33,976][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:34,310][626795] Updated weights for policy 0, policy_version 146052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:36,104][626795] Updated weights for policy 0, policy_version 146062 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:37,804][626795] Updated weights for policy 0, policy_version 146072 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:38,976][24592] Fps is (10 sec: 47510.6, 60 sec: 42324.9, 300 sec: 43042.6). Total num frames: 1196670976. Throughput: 0: 11019.1. Samples: 49166682. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:38,977][24592] Avg episode reward: [(0, '4.441')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:39,472][626795] Updated weights for policy 0, policy_version 146082 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:41,192][626795] Updated weights for policy 0, policy_version 146092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:42,878][626795] Updated weights for policy 0, policy_version 146102 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:43,976][24592] Fps is (10 sec: 48332.9, 60 sec: 44782.9, 300 sec: 43153.8). Total num frames: 1196916736. Throughput: 0: 11008.9. Samples: 49202256. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:43,977][24592] Avg episode reward: [(0, '4.358')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:44,618][626795] Updated weights for policy 0, policy_version 146112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:46,422][626795] Updated weights for policy 0, policy_version 146122 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:48,072][626795] Updated weights for policy 0, policy_version 146132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:51,537][24592] Fps is (10 sec: 37826.2, 60 sec: 43080.5, 300 sec: 42809.8). Total num frames: 1197146112. Throughput: 0: 10400.9. Samples: 49273236. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:51,539][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:52,696][626795] Updated weights for policy 0, policy_version 146142 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:53,975][24592] Fps is (10 sec: 33587.4, 60 sec: 42598.4, 300 sec: 42792.9). Total num frames: 1197252608. Throughput: 0: 10261.1. Samples: 49310886. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:53,976][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:54,449][626795] Updated weights for policy 0, policy_version 146152 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:56,420][626795] Updated weights for policy 0, policy_version 146162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:37:58,215][626795] Updated weights for policy 0, policy_version 146172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:58,977][24592] Fps is (10 sec: 44045.1, 60 sec: 42324.5, 300 sec: 42848.2). Total num frames: 1197473792. Throughput: 0: 10202.8. Samples: 49343640. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:37:58,977][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:00,029][626795] Updated weights for policy 0, policy_version 146182 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:01,772][626795] Updated weights for policy 0, policy_version 146192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:03,601][626795] Updated weights for policy 0, policy_version 146202 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:03,975][24592] Fps is (10 sec: 45055.8, 60 sec: 42188.7, 300 sec: 42820.5). Total num frames: 1197703168. Throughput: 0: 10870.5. Samples: 49411632. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:03,976][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:05,449][626795] Updated weights for policy 0, policy_version 146212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:07,201][626795] Updated weights for policy 0, policy_version 146222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:08,910][626795] Updated weights for policy 0, policy_version 146232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:08,976][24592] Fps is (10 sec: 45878.4, 60 sec: 42051.9, 300 sec: 43292.6). Total num frames: 1197932544. Throughput: 0: 10931.6. Samples: 49480458. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:08,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:10,704][626795] Updated weights for policy 0, policy_version 146242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:12,368][626795] Updated weights for policy 0, policy_version 146252 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:13,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42052.3, 300 sec: 43292.7). Total num frames: 1198161920. Throughput: 0: 10925.7. Samples: 49515240. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:13,976][24592] Avg episode reward: [(0, '4.354')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:14,247][626795] Updated weights for policy 0, policy_version 146262 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:16,097][626795] Updated weights for policy 0, policy_version 146272 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:17,800][626795] Updated weights for policy 0, policy_version 146282 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:18,975][24592] Fps is (10 sec: 45877.8, 60 sec: 44236.8, 300 sec: 43320.4). Total num frames: 1198391296. Throughput: 0: 10877.6. Samples: 49584042. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:18,976][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:19,649][626795] Updated weights for policy 0, policy_version 146292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:21,477][626795] Updated weights for policy 0, policy_version 146302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:23,141][626795] Updated weights for policy 0, policy_version 146312 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:25,534][24592] Fps is (10 sec: 38271.1, 60 sec: 42850.5, 300 sec: 43037.5). Total num frames: 1198604288. Throughput: 0: 10436.9. Samples: 49652604. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:25,537][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:26,789][626795] Updated weights for policy 0, policy_version 146322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:28,499][626795] Updated weights for policy 0, policy_version 146332 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:28,975][24592] Fps is (10 sec: 37682.8, 60 sec: 42871.4, 300 sec: 43014.9). Total num frames: 1198768128. Throughput: 0: 10330.9. Samples: 49667148. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:28,977][24592] Avg episode reward: [(0, '4.172')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:30,283][626795] Updated weights for policy 0, policy_version 146342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:32,080][626795] Updated weights for policy 0, policy_version 146352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:33,848][626795] Updated weights for policy 0, policy_version 146362 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:33,975][24592] Fps is (10 sec: 46582.6, 60 sec: 42735.0, 300 sec: 43014.9). Total num frames: 1198997504. Throughput: 0: 10908.1. Samples: 49736160. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:33,977][24592] Avg episode reward: [(0, '4.404')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:35,582][626795] Updated weights for policy 0, policy_version 146372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:37,492][626795] Updated weights for policy 0, policy_version 146382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:38,976][24592] Fps is (10 sec: 45874.1, 60 sec: 42598.6, 300 sec: 43042.7). Total num frames: 1199226880. Throughput: 0: 10984.7. Samples: 49805202. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:38,977][24592] Avg episode reward: [(0, '4.421')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:39,198][626795] Updated weights for policy 0, policy_version 146392 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:40,974][626795] Updated weights for policy 0, policy_version 146402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:42,661][626795] Updated weights for policy 0, policy_version 146412 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:43,976][24592] Fps is (10 sec: 46693.5, 60 sec: 42461.8, 300 sec: 43375.9). Total num frames: 1199464448. Throughput: 0: 11018.0. Samples: 49839438. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:43,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:44,434][626795] Updated weights for policy 0, policy_version 146422 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:46,104][626795] Updated weights for policy 0, policy_version 146432 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:47,854][626795] Updated weights for policy 0, policy_version 146442 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:48,976][24592] Fps is (10 sec: 47512.1, 60 sec: 44497.7, 300 sec: 43403.6). Total num frames: 1199702016. Throughput: 0: 11116.0. Samples: 49911858. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:48,977][24592] Avg episode reward: [(0, '4.315')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:49,601][626795] Updated weights for policy 0, policy_version 146452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:50,964][626772] Signal inference workers to stop experience collection... (750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:50,965][626772] Signal inference workers to resume experience collection... (750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:50,973][626795] InferenceWorker_p0-w0: stopping experience collection (750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:50,976][626795] InferenceWorker_p0-w0: resuming experience collection (750 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:51,184][626795] Updated weights for policy 0, policy_version 146462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:52,938][626795] Updated weights for policy 0, policy_version 146472 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:53,975][24592] Fps is (10 sec: 47514.7, 60 sec: 44783.0, 300 sec: 43431.5). Total num frames: 1199939584. Throughput: 0: 11176.8. Samples: 49983408. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:38:53,977][24592] Avg episode reward: [(0, '4.499')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:54,717][626795] Updated weights for policy 0, policy_version 146482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:38:56,312][626795] Updated weights for policy 0, policy_version 146492 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:00,583][24592] Fps is (10 sec: 35996.1, 60 sec: 42950.6, 300 sec: 43030.4). Total num frames: 1200119808. Throughput: 0: 10819.6. Samples: 50019510. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:00,584][24592] Avg episode reward: [(0, '4.212')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:01,006][626795] Updated weights for policy 0, policy_version 146502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:02,752][626795] Updated weights for policy 0, policy_version 146512 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:03,976][24592] Fps is (10 sec: 33586.7, 60 sec: 42871.4, 300 sec: 43015.0). Total num frames: 1200275456. Throughput: 0: 10506.5. Samples: 50056836. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:03,978][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000146518_1200275456.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:04,079][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000145254_1189920768.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:04,502][626795] Updated weights for policy 0, policy_version 146522 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:06,438][626795] Updated weights for policy 0, policy_version 146532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:08,241][626795] Updated weights for policy 0, policy_version 146542 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:08,976][24592] Fps is (10 sec: 45871.6, 60 sec: 42871.3, 300 sec: 43014.8). Total num frames: 1200504832. Throughput: 0: 10860.8. Samples: 50124420. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:08,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:09,928][626795] Updated weights for policy 0, policy_version 146552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:11,582][626795] Updated weights for policy 0, policy_version 146562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:13,355][626795] Updated weights for policy 0, policy_version 146572 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:13,976][24592] Fps is (10 sec: 46694.3, 60 sec: 43007.9, 300 sec: 42987.1). Total num frames: 1200742400. Throughput: 0: 10952.9. Samples: 50160030. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:13,978][24592] Avg episode reward: [(0, '4.353')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:15,043][626795] Updated weights for policy 0, policy_version 146582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:16,670][626795] Updated weights for policy 0, policy_version 146592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:18,375][626795] Updated weights for policy 0, policy_version 146602 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:18,975][24592] Fps is (10 sec: 48336.6, 60 sec: 43281.1, 300 sec: 43431.5). Total num frames: 1200988160. Throughput: 0: 11027.1. Samples: 50232378. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:18,976][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:20,142][626795] Updated weights for policy 0, policy_version 146612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:21,820][626795] Updated weights for policy 0, policy_version 146622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:23,488][626795] Updated weights for policy 0, policy_version 146632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:23,977][24592] Fps is (10 sec: 48327.4, 60 sec: 44855.0, 300 sec: 43459.1). Total num frames: 1201225728. Throughput: 0: 11105.4. Samples: 50304954. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:23,978][24592] Avg episode reward: [(0, '4.387')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:25,259][626795] Updated weights for policy 0, policy_version 146642 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:27,099][626795] Updated weights for policy 0, policy_version 146652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:28,941][626795] Updated weights for policy 0, policy_version 146662 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:28,976][24592] Fps is (10 sec: 46693.5, 60 sec: 44782.9, 300 sec: 43459.2). Total num frames: 1201455104. Throughput: 0: 11112.5. Samples: 50339502. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:28,977][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:30,602][626795] Updated weights for policy 0, policy_version 146672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:32,425][626795] Updated weights for policy 0, policy_version 146682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:35,515][24592] Fps is (10 sec: 35498.0, 60 sec: 42863.6, 300 sec: 43068.2). Total num frames: 1201635328. Throughput: 0: 10654.6. Samples: 50407716. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:35,517][24592] Avg episode reward: [(0, '4.417')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:37,050][626795] Updated weights for policy 0, policy_version 146692 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:38,970][626795] Updated weights for policy 0, policy_version 146702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:38,977][24592] Fps is (10 sec: 32764.7, 60 sec: 42597.8, 300 sec: 42987.0). Total num frames: 1201782784. Throughput: 0: 10211.9. Samples: 50442954. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:38,978][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:40,788][626795] Updated weights for policy 0, policy_version 146712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:42,518][626795] Updated weights for policy 0, policy_version 146722 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:43,975][24592] Fps is (10 sec: 44542.5, 60 sec: 42462.0, 300 sec: 42959.4). Total num frames: 1202012160. Throughput: 0: 10543.9. Samples: 50477040. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:43,977][24592] Avg episode reward: [(0, '4.197')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:44,328][626795] Updated weights for policy 0, policy_version 146732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:45,912][626795] Updated weights for policy 0, policy_version 146742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:47,702][626795] Updated weights for policy 0, policy_version 146752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:48,976][24592] Fps is (10 sec: 46699.4, 60 sec: 42462.2, 300 sec: 43015.0). Total num frames: 1202249728. Throughput: 0: 10919.3. Samples: 50548206. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:48,979][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:49,454][626795] Updated weights for policy 0, policy_version 146762 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:51,150][626795] Updated weights for policy 0, policy_version 146772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:52,795][626795] Updated weights for policy 0, policy_version 146782 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:53,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42461.8, 300 sec: 43348.2). Total num frames: 1202487296. Throughput: 0: 11030.7. Samples: 50620794. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:53,977][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:54,479][626795] Updated weights for policy 0, policy_version 146792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:56,331][626795] Updated weights for policy 0, policy_version 146802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:57,926][626795] Updated weights for policy 0, policy_version 146812 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:58,976][24592] Fps is (10 sec: 48331.3, 60 sec: 44752.6, 300 sec: 43431.4). Total num frames: 1202733056. Throughput: 0: 11023.0. Samples: 50656068. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:39:58,977][24592] Avg episode reward: [(0, '4.379')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:39:59,649][626795] Updated weights for policy 0, policy_version 146822 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:01,570][626795] Updated weights for policy 0, policy_version 146832 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:03,197][626795] Updated weights for policy 0, policy_version 146842 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:03,976][24592] Fps is (10 sec: 47512.4, 60 sec: 44782.8, 300 sec: 43403.7). Total num frames: 1202962432. Throughput: 0: 10975.8. Samples: 50726292. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:03,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:04,972][626795] Updated weights for policy 0, policy_version 146852 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:06,798][626795] Updated weights for policy 0, policy_version 146862 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:10,302][24592] Fps is (10 sec: 35439.7, 60 sec: 42879.4, 300 sec: 42960.6). Total num frames: 1203134464. Throughput: 0: 9845.6. Samples: 50761056. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:10,303][24592] Avg episode reward: [(0, '4.342')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:11,067][626795] Updated weights for policy 0, policy_version 146872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:12,988][626795] Updated weights for policy 0, policy_version 146882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:13,975][24592] Fps is (10 sec: 33588.1, 60 sec: 42598.5, 300 sec: 42903.9). Total num frames: 1203298304. Throughput: 0: 10237.4. Samples: 50800182. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:13,977][24592] Avg episode reward: [(0, '4.364')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:14,761][626795] Updated weights for policy 0, policy_version 146892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:16,645][626795] Updated weights for policy 0, policy_version 146902 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:18,379][626795] Updated weights for policy 0, policy_version 146912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:18,975][24592] Fps is (10 sec: 45337.6, 60 sec: 42325.3, 300 sec: 42876.1). Total num frames: 1203527680. Throughput: 0: 10594.0. Samples: 50868132. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:18,976][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:20,082][626795] Updated weights for policy 0, policy_version 146922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:21,776][626795] Updated weights for policy 0, policy_version 146932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:23,427][626795] Updated weights for policy 0, policy_version 146942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:23,975][24592] Fps is (10 sec: 47513.9, 60 sec: 42462.8, 300 sec: 42931.6). Total num frames: 1203773440. Throughput: 0: 11046.3. Samples: 50940024. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:23,976][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:25,169][626795] Updated weights for policy 0, policy_version 146952 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:26,960][626795] Updated weights for policy 0, policy_version 146962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:28,551][626795] Updated weights for policy 0, policy_version 146972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:28,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42462.0, 300 sec: 43376.0). Total num frames: 1204002816. Throughput: 0: 11089.3. Samples: 50976060. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:28,977][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:30,374][626795] Updated weights for policy 0, policy_version 146982 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:31,983][626795] Updated weights for policy 0, policy_version 146992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:33,690][626795] Updated weights for policy 0, policy_version 147002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:33,975][24592] Fps is (10 sec: 47513.1, 60 sec: 44701.4, 300 sec: 43487.0). Total num frames: 1204248576. Throughput: 0: 11111.2. Samples: 51048210. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:33,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:35,534][626795] Updated weights for policy 0, policy_version 147012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:37,211][626795] Updated weights for policy 0, policy_version 147022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:38,962][626795] Updated weights for policy 0, policy_version 147032 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:38,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45056.9, 300 sec: 43487.3). Total num frames: 1204486144. Throughput: 0: 11042.4. Samples: 51117702. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:38,976][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:40,835][626795] Updated weights for policy 0, policy_version 147042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:45,184][24592] Fps is (10 sec: 35083.1, 60 sec: 42962.1, 300 sec: 43033.1). Total num frames: 1204641792. Throughput: 0: 10739.6. Samples: 51152322. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:45,186][24592] Avg episode reward: [(0, '4.242')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:45,322][626795] Updated weights for policy 0, policy_version 147052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:47,128][626795] Updated weights for policy 0, policy_version 147062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:48,942][626795] Updated weights for policy 0, policy_version 147072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:48,975][24592] Fps is (10 sec: 32768.1, 60 sec: 42735.0, 300 sec: 42987.2). Total num frames: 1204813824. Throughput: 0: 10290.1. Samples: 51189342. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:48,976][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:50,910][626795] Updated weights for policy 0, policy_version 147082 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:52,640][626795] Updated weights for policy 0, policy_version 147092 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:53,976][24592] Fps is (10 sec: 44724.1, 60 sec: 42461.7, 300 sec: 42903.8). Total num frames: 1205035008. Throughput: 0: 11335.8. Samples: 51256128. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:53,978][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:54,434][626795] Updated weights for policy 0, policy_version 147102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:56,268][626795] Updated weights for policy 0, policy_version 147112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:58,094][626795] Updated weights for policy 0, policy_version 147122 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:58,975][24592] Fps is (10 sec: 44236.8, 60 sec: 42052.6, 300 sec: 42848.3). Total num frames: 1205256192. Throughput: 0: 10896.9. Samples: 51290544. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:40:58,976][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:40:59,818][626795] Updated weights for policy 0, policy_version 147132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:01,660][626795] Updated weights for policy 0, policy_version 147142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:03,324][626795] Updated weights for policy 0, policy_version 147152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:03,975][24592] Fps is (10 sec: 45876.2, 60 sec: 42189.0, 300 sec: 43320.4). Total num frames: 1205493760. Throughput: 0: 10920.0. Samples: 51359532. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:03,977][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000147155_1205493760.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:04,037][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000145895_1195171840.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:05,183][626795] Updated weights for policy 0, policy_version 147162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:07,039][626795] Updated weights for policy 0, policy_version 147172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:08,760][626795] Updated weights for policy 0, policy_version 147182 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:08,976][24592] Fps is (10 sec: 46693.0, 60 sec: 44120.1, 300 sec: 43320.4). Total num frames: 1205723136. Throughput: 0: 10838.2. Samples: 51427746. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:08,978][24592] Avg episode reward: [(0, '4.291')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:10,613][626795] Updated weights for policy 0, policy_version 147192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:12,349][626795] Updated weights for policy 0, policy_version 147202 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:13,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44236.8, 300 sec: 43292.6). Total num frames: 1205952512. Throughput: 0: 10802.8. Samples: 51462186. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:13,977][24592] Avg episode reward: [(0, '4.282')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:14,143][626795] Updated weights for policy 0, policy_version 147212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:15,952][626795] Updated weights for policy 0, policy_version 147222 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:19,321][24592] Fps is (10 sec: 38008.1, 60 sec: 42897.2, 300 sec: 42992.3). Total num frames: 1206116352. Throughput: 0: 9894.4. Samples: 51496878. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:19,323][24592] Avg episode reward: [(0, '4.382')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:19,657][626795] Updated weights for policy 0, policy_version 147232 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:21,306][626795] Updated weights for policy 0, policy_version 147242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:23,189][626795] Updated weights for policy 0, policy_version 147252 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:23,975][24592] Fps is (10 sec: 36864.0, 60 sec: 42461.9, 300 sec: 42931.6). Total num frames: 1206321152. Throughput: 0: 10227.2. Samples: 51577926. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:23,977][24592] Avg episode reward: [(0, '4.364')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:25,028][626795] Updated weights for policy 0, policy_version 147262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:26,789][626795] Updated weights for policy 0, policy_version 147272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:28,722][626795] Updated weights for policy 0, policy_version 147282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:28,975][24592] Fps is (10 sec: 44124.5, 60 sec: 42325.3, 300 sec: 42876.1). Total num frames: 1206542336. Throughput: 0: 10494.2. Samples: 51611880. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:28,976][24592] Avg episode reward: [(0, '4.307')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:30,436][626795] Updated weights for policy 0, policy_version 147292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:32,198][626795] Updated weights for policy 0, policy_version 147302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:33,922][626795] Updated weights for policy 0, policy_version 147312 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:33,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42188.9, 300 sec: 42876.1). Total num frames: 1206779904. Throughput: 0: 10905.7. Samples: 51680100. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:33,976][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:35,782][626795] Updated weights for policy 0, policy_version 147322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:37,550][626795] Updated weights for policy 0, policy_version 147332 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:38,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42052.3, 300 sec: 43320.4). Total num frames: 1207009280. Throughput: 0: 10960.6. Samples: 51749352. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:38,977][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:39,328][626795] Updated weights for policy 0, policy_version 147342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:41,080][626795] Updated weights for policy 0, policy_version 147352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:42,824][626795] Updated weights for policy 0, policy_version 147362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:43,976][24592] Fps is (10 sec: 45870.3, 60 sec: 44169.7, 300 sec: 43348.1). Total num frames: 1207238656. Throughput: 0: 10961.7. Samples: 51783834. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:43,977][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:44,652][626795] Updated weights for policy 0, policy_version 147372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:46,450][626795] Updated weights for policy 0, policy_version 147382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:48,205][626795] Updated weights for policy 0, policy_version 147392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:48,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44236.8, 300 sec: 43292.6). Total num frames: 1207468032. Throughput: 0: 10985.6. Samples: 51853884. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:48,976][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:49,885][626795] Updated weights for policy 0, policy_version 147402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:53,881][626795] Updated weights for policy 0, policy_version 147412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:53,976][24592] Fps is (10 sec: 36866.5, 60 sec: 42871.4, 300 sec: 42959.4). Total num frames: 1207607296. Throughput: 0: 10495.4. Samples: 51900042. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:53,977][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:55,529][626795] Updated weights for policy 0, policy_version 147422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:57,336][626795] Updated weights for policy 0, policy_version 147432 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:58,975][24592] Fps is (10 sec: 36044.7, 60 sec: 42871.4, 300 sec: 42903.9). Total num frames: 1207828480. Throughput: 0: 10460.7. Samples: 51932916. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:41:58,976][24592] Avg episode reward: [(0, '4.292')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:41:59,173][626795] Updated weights for policy 0, policy_version 147442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:00,976][626795] Updated weights for policy 0, policy_version 147452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:02,683][626795] Updated weights for policy 0, policy_version 147462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:03,977][24592] Fps is (10 sec: 45049.1, 60 sec: 42733.6, 300 sec: 42875.8). Total num frames: 1208057856. Throughput: 0: 11314.1. Samples: 52002120. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:03,978][24592] Avg episode reward: [(0, '4.346')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:04,526][626795] Updated weights for policy 0, policy_version 147472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:06,264][626795] Updated weights for policy 0, policy_version 147482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:08,044][626795] Updated weights for policy 0, policy_version 147492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:08,975][24592] Fps is (10 sec: 45875.5, 60 sec: 42735.2, 300 sec: 42876.1). Total num frames: 1208287232. Throughput: 0: 10963.1. Samples: 52071264. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:08,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:09,786][626795] Updated weights for policy 0, policy_version 147502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:11,538][626795] Updated weights for policy 0, policy_version 147512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:13,355][626795] Updated weights for policy 0, policy_version 147522 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:13,975][24592] Fps is (10 sec: 46703.0, 60 sec: 42871.4, 300 sec: 43348.2). Total num frames: 1208524800. Throughput: 0: 10993.1. Samples: 52106568. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:13,977][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:15,173][626795] Updated weights for policy 0, policy_version 147532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:16,818][626795] Updated weights for policy 0, policy_version 147542 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:18,665][626795] Updated weights for policy 0, policy_version 147552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:18,975][24592] Fps is (10 sec: 47513.4, 60 sec: 44356.0, 300 sec: 43376.0). Total num frames: 1208762368. Throughput: 0: 11011.2. Samples: 52175604. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:18,977][24592] Avg episode reward: [(0, '4.412')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:20,359][626795] Updated weights for policy 0, policy_version 147562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:22,155][626795] Updated weights for policy 0, policy_version 147572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:23,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44373.3, 300 sec: 43348.2). Total num frames: 1208983552. Throughput: 0: 11010.4. Samples: 52244820. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:23,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:23,998][626795] Updated weights for policy 0, policy_version 147582 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:25,688][626795] Updated weights for policy 0, policy_version 147592 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:28,975][24592] Fps is (10 sec: 35225.6, 60 sec: 42871.5, 300 sec: 42987.2). Total num frames: 1209114624. Throughput: 0: 10859.6. Samples: 52272504. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:28,977][24592] Avg episode reward: [(0, '4.417')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:29,731][626795] Updated weights for policy 0, policy_version 147602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:31,485][626795] Updated weights for policy 0, policy_version 147612 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:33,269][626795] Updated weights for policy 0, policy_version 147622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:33,976][24592] Fps is (10 sec: 36862.6, 60 sec: 42871.1, 300 sec: 42987.2). Total num frames: 1209352192. Throughput: 0: 10440.0. Samples: 52323690. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:33,977][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:34,922][626795] Updated weights for policy 0, policy_version 147632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:36,828][626795] Updated weights for policy 0, policy_version 147642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:38,544][626795] Updated weights for policy 0, policy_version 147652 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:38,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42871.5, 300 sec: 42931.6). Total num frames: 1209581568. Throughput: 0: 10956.1. Samples: 52393062. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:38,977][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:40,245][626795] Updated weights for policy 0, policy_version 147662 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:42,070][626795] Updated weights for policy 0, policy_version 147672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:43,880][626795] Updated weights for policy 0, policy_version 147682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:43,976][24592] Fps is (10 sec: 45874.4, 60 sec: 42871.8, 300 sec: 43307.6). Total num frames: 1209810944. Throughput: 0: 10999.2. Samples: 52427886. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:43,977][24592] Avg episode reward: [(0, '4.181')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:45,579][626795] Updated weights for policy 0, policy_version 147692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:47,393][626795] Updated weights for policy 0, policy_version 147702 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:48,975][24592] Fps is (10 sec: 45874.6, 60 sec: 42871.4, 300 sec: 43348.2). Total num frames: 1210040320. Throughput: 0: 11001.2. Samples: 52497156. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:48,977][24592] Avg episode reward: [(0, '4.429')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:49,181][626795] Updated weights for policy 0, policy_version 147712 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:50,986][626795] Updated weights for policy 0, policy_version 147722 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:52,738][626795] Updated weights for policy 0, policy_version 147732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:53,975][24592] Fps is (10 sec: 45878.0, 60 sec: 44373.6, 300 sec: 43376.1). Total num frames: 1210269696. Throughput: 0: 10999.7. Samples: 52566252. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:53,977][24592] Avg episode reward: [(0, '4.378')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:54,510][626795] Updated weights for policy 0, policy_version 147742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:56,263][626795] Updated weights for policy 0, policy_version 147752 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:58,080][626795] Updated weights for policy 0, policy_version 147762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:58,975][24592] Fps is (10 sec: 46694.8, 60 sec: 44646.4, 300 sec: 43403.7). Total num frames: 1210507264. Throughput: 0: 10989.6. Samples: 52601100. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:42:58,977][24592] Avg episode reward: [(0, '4.433')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:42:59,740][626795] Updated weights for policy 0, policy_version 147772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:03,717][626795] Updated weights for policy 0, policy_version 147782 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:03,975][24592] Fps is (10 sec: 36864.1, 60 sec: 43009.4, 300 sec: 43070.6). Total num frames: 1210638336. Throughput: 0: 10439.3. Samples: 52645374. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:03,977][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000147783_1210638336.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:04,054][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000146518_1200275456.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:05,581][626795] Updated weights for policy 0, policy_version 147792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:07,277][626795] Updated weights for policy 0, policy_version 147802 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:08,975][24592] Fps is (10 sec: 36045.2, 60 sec: 43008.0, 300 sec: 43070.5). Total num frames: 1210867712. Throughput: 0: 10443.9. Samples: 52714794. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:08,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:09,117][626795] Updated weights for policy 0, policy_version 147812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:10,820][626795] Updated weights for policy 0, policy_version 147822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:12,718][626795] Updated weights for policy 0, policy_version 147832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:13,976][24592] Fps is (10 sec: 45873.3, 60 sec: 42871.2, 300 sec: 43070.4). Total num frames: 1211097088. Throughput: 0: 10580.7. Samples: 52748640. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:13,979][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:14,544][626795] Updated weights for policy 0, policy_version 147842 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:16,278][626795] Updated weights for policy 0, policy_version 147852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:17,980][626795] Updated weights for policy 0, policy_version 147862 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:18,975][24592] Fps is (10 sec: 45875.0, 60 sec: 42734.9, 300 sec: 43355.1). Total num frames: 1211326464. Throughput: 0: 10979.3. Samples: 52817754. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:18,977][24592] Avg episode reward: [(0, '4.342')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:19,813][626795] Updated weights for policy 0, policy_version 147872 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:21,491][626795] Updated weights for policy 0, policy_version 147882 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:23,266][626795] Updated weights for policy 0, policy_version 147892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:23,979][24592] Fps is (10 sec: 45863.0, 60 sec: 42869.3, 300 sec: 43347.7). Total num frames: 1211555840. Throughput: 0: 10993.7. Samples: 52887810. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:23,980][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:25,085][626795] Updated weights for policy 0, policy_version 147902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:26,808][626795] Updated weights for policy 0, policy_version 147912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:28,536][626795] Updated weights for policy 0, policy_version 147922 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:28,981][24592] Fps is (10 sec: 45850.6, 60 sec: 44505.9, 300 sec: 43347.4). Total num frames: 1211785216. Throughput: 0: 10983.0. Samples: 52922172. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:28,982][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:30,385][626795] Updated weights for policy 0, policy_version 147932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:32,108][626795] Updated weights for policy 0, policy_version 147942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:33,930][626795] Updated weights for policy 0, policy_version 147952 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:33,976][24592] Fps is (10 sec: 46706.5, 60 sec: 44509.9, 300 sec: 43375.9). Total num frames: 1212022784. Throughput: 0: 11003.7. Samples: 52992324. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:33,977][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:37,827][626795] Updated weights for policy 0, policy_version 147962 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:38,975][24592] Fps is (10 sec: 36883.8, 60 sec: 42871.5, 300 sec: 43015.0). Total num frames: 1212153856. Throughput: 0: 10438.3. Samples: 53035974. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:38,977][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:39,628][626795] Updated weights for policy 0, policy_version 147972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:41,298][626795] Updated weights for policy 0, policy_version 147982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:43,206][626795] Updated weights for policy 0, policy_version 147992 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:43,976][24592] Fps is (10 sec: 36044.4, 60 sec: 42871.5, 300 sec: 42987.2). Total num frames: 1212383232. Throughput: 0: 10438.7. Samples: 53070846. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:43,978][24592] Avg episode reward: [(0, '4.242')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:44,875][626795] Updated weights for policy 0, policy_version 148002 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:46,705][626795] Updated weights for policy 0, policy_version 148012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:48,407][626795] Updated weights for policy 0, policy_version 148022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:48,975][24592] Fps is (10 sec: 46694.2, 60 sec: 43008.1, 300 sec: 42987.2). Total num frames: 1212620800. Throughput: 0: 11003.4. Samples: 53140530. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:48,977][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:50,206][626795] Updated weights for policy 0, policy_version 148032 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:51,944][626795] Updated weights for policy 0, policy_version 148042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:53,697][626795] Updated weights for policy 0, policy_version 148052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:53,975][24592] Fps is (10 sec: 46697.1, 60 sec: 43008.0, 300 sec: 43390.2). Total num frames: 1212850176. Throughput: 0: 11012.8. Samples: 53210370. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:53,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:55,510][626795] Updated weights for policy 0, policy_version 148062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:57,329][626795] Updated weights for policy 0, policy_version 148072 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:43:58,965][626795] Updated weights for policy 0, policy_version 148082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:58,976][24592] Fps is (10 sec: 46690.5, 60 sec: 43007.4, 300 sec: 43431.4). Total num frames: 1213087744. Throughput: 0: 11031.9. Samples: 53245080. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:43:58,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:00,803][626795] Updated weights for policy 0, policy_version 148092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:02,530][626795] Updated weights for policy 0, policy_version 148102 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:03,975][24592] Fps is (10 sec: 46693.9, 60 sec: 44646.3, 300 sec: 43431.6). Total num frames: 1213317120. Throughput: 0: 11038.3. Samples: 53314476. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:03,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:04,378][626795] Updated weights for policy 0, policy_version 148112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:06,117][626795] Updated weights for policy 0, policy_version 148122 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:07,866][626795] Updated weights for policy 0, policy_version 148132 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:08,975][24592] Fps is (10 sec: 45879.0, 60 sec: 44646.3, 300 sec: 43403.7). Total num frames: 1213546496. Throughput: 0: 11031.9. Samples: 53384214. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:08,977][24592] Avg episode reward: [(0, '4.311')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:11,832][626795] Updated weights for policy 0, policy_version 148142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:13,533][626795] Updated weights for policy 0, policy_version 148152 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:13,975][24592] Fps is (10 sec: 36044.8, 60 sec: 43008.2, 300 sec: 43014.9). Total num frames: 1213677568. Throughput: 0: 10477.1. Samples: 53393586. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:13,976][24592] Avg episode reward: [(0, '4.300')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:15,355][626795] Updated weights for policy 0, policy_version 148162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:17,171][626795] Updated weights for policy 0, policy_version 148172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:18,917][626795] Updated weights for policy 0, policy_version 148182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:18,976][24592] Fps is (10 sec: 36043.6, 60 sec: 43007.7, 300 sec: 42987.3). Total num frames: 1213906944. Throughput: 0: 10451.7. Samples: 53462652. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:18,977][24592] Avg episode reward: [(0, '4.474')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:20,624][626795] Updated weights for policy 0, policy_version 148192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:22,431][626795] Updated weights for policy 0, policy_version 148202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:23,975][24592] Fps is (10 sec: 46694.5, 60 sec: 43146.7, 300 sec: 43015.0). Total num frames: 1214144512. Throughput: 0: 11037.5. Samples: 53532660. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:23,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:24,286][626795] Updated weights for policy 0, policy_version 148212 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:25,929][626795] Updated weights for policy 0, policy_version 148222 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:27,712][626795] Updated weights for policy 0, policy_version 148232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:28,976][24592] Fps is (10 sec: 45876.5, 60 sec: 43011.8, 300 sec: 43380.2). Total num frames: 1214365696. Throughput: 0: 11027.0. Samples: 53567058. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:28,979][24592] Avg episode reward: [(0, '4.328')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:29,464][626795] Updated weights for policy 0, policy_version 148242 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:31,216][626795] Updated weights for policy 0, policy_version 148252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:33,072][626795] Updated weights for policy 0, policy_version 148262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:33,975][24592] Fps is (10 sec: 45875.4, 60 sec: 43008.3, 300 sec: 43459.4). Total num frames: 1214603264. Throughput: 0: 11022.1. Samples: 53636526. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:33,977][24592] Avg episode reward: [(0, '4.142')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:34,883][626795] Updated weights for policy 0, policy_version 148272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:36,517][626795] Updated weights for policy 0, policy_version 148282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:38,345][626795] Updated weights for policy 0, policy_version 148292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:38,975][24592] Fps is (10 sec: 46695.0, 60 sec: 44646.4, 300 sec: 43459.3). Total num frames: 1214832640. Throughput: 0: 11019.3. Samples: 53706240. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:38,976][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:40,156][626795] Updated weights for policy 0, policy_version 148302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:41,927][626795] Updated weights for policy 0, policy_version 148312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:43,665][626795] Updated weights for policy 0, policy_version 148322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:45,929][24592] Fps is (10 sec: 37691.6, 60 sec: 43106.5, 300 sec: 43118.1). Total num frames: 1215053824. Throughput: 0: 10551.9. Samples: 53740524. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:45,930][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:47,660][626795] Updated weights for policy 0, policy_version 148332 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:48,975][24592] Fps is (10 sec: 36044.9, 60 sec: 42871.5, 300 sec: 43070.5). Total num frames: 1215193088. Throughput: 0: 10446.0. Samples: 53784546. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:48,976][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:49,366][626795] Updated weights for policy 0, policy_version 148342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:51,134][626795] Updated weights for policy 0, policy_version 148352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:52,939][626795] Updated weights for policy 0, policy_version 148362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:53,976][24592] Fps is (10 sec: 45813.7, 60 sec: 42871.2, 300 sec: 43014.9). Total num frames: 1215422464. Throughput: 0: 10440.5. Samples: 53854038. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:53,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:54,862][626795] Updated weights for policy 0, policy_version 148372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:56,461][626795] Updated weights for policy 0, policy_version 148382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:44:58,279][626795] Updated weights for policy 0, policy_version 148392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:58,975][24592] Fps is (10 sec: 45874.7, 60 sec: 42735.5, 300 sec: 43015.0). Total num frames: 1215651840. Throughput: 0: 10998.4. Samples: 53888514. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:44:58,976][24592] Avg episode reward: [(0, '4.337')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:00,043][626795] Updated weights for policy 0, policy_version 148402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:01,790][626795] Updated weights for policy 0, policy_version 148412 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:03,549][626795] Updated weights for policy 0, policy_version 148422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:03,975][24592] Fps is (10 sec: 46696.3, 60 sec: 42871.5, 300 sec: 43432.5). Total num frames: 1215889408. Throughput: 0: 11013.2. Samples: 53958240. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:03,977][24592] Avg episode reward: [(0, '4.508')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000148424_1215889408.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:04,050][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000147155_1205493760.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:05,417][626795] Updated weights for policy 0, policy_version 148432 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:07,099][626795] Updated weights for policy 0, policy_version 148442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:08,891][626795] Updated weights for policy 0, policy_version 148452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:08,975][24592] Fps is (10 sec: 46694.8, 60 sec: 42871.5, 300 sec: 43459.3). Total num frames: 1216118784. Throughput: 0: 11001.2. Samples: 54027714. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:08,976][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:10,640][626795] Updated weights for policy 0, policy_version 148462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:12,403][626795] Updated weights for policy 0, policy_version 148472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:13,975][24592] Fps is (10 sec: 45874.7, 60 sec: 44509.9, 300 sec: 43459.2). Total num frames: 1216348160. Throughput: 0: 11000.7. Samples: 54062088. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:13,976][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:14,203][626795] Updated weights for policy 0, policy_version 148482 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:16,014][626795] Updated weights for policy 0, policy_version 148492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:17,741][626795] Updated weights for policy 0, policy_version 148502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:20,377][24592] Fps is (10 sec: 37362.0, 60 sec: 42960.5, 300 sec: 43087.9). Total num frames: 1216544768. Throughput: 0: 10676.4. Samples: 54131928. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:20,378][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:21,729][626795] Updated weights for policy 0, policy_version 148512 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:23,539][626795] Updated weights for policy 0, policy_version 148522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:23,976][24592] Fps is (10 sec: 36862.2, 60 sec: 42871.1, 300 sec: 43098.2). Total num frames: 1216716800. Throughput: 0: 10435.9. Samples: 54175860. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:23,977][24592] Avg episode reward: [(0, '4.393')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:25,204][626795] Updated weights for policy 0, policy_version 148532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:26,983][626795] Updated weights for policy 0, policy_version 148542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:28,767][626795] Updated weights for policy 0, policy_version 148552 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:28,975][24592] Fps is (10 sec: 46683.1, 60 sec: 43008.0, 300 sec: 43042.7). Total num frames: 1216946176. Throughput: 0: 10928.2. Samples: 54210942. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:28,977][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:30,500][626795] Updated weights for policy 0, policy_version 148562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:32,270][626795] Updated weights for policy 0, policy_version 148572 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:33,976][24592] Fps is (10 sec: 45876.1, 60 sec: 42871.2, 300 sec: 43014.9). Total num frames: 1217175552. Throughput: 0: 11016.0. Samples: 54280272. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:33,978][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:34,094][626795] Updated weights for policy 0, policy_version 148582 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:35,893][626795] Updated weights for policy 0, policy_version 148592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:37,617][626795] Updated weights for policy 0, policy_version 148602 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:38,097][626772] Signal inference workers to stop experience collection... (800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:38,098][626772] Signal inference workers to resume experience collection... (800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:38,108][626795] InferenceWorker_p0-w0: stopping experience collection (800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:38,108][626795] InferenceWorker_p0-w0: resuming experience collection (800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:38,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42871.5, 300 sec: 43442.8). Total num frames: 1217404928. Throughput: 0: 11003.3. Samples: 54349182. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:38,977][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:39,432][626795] Updated weights for policy 0, policy_version 148612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:41,152][626795] Updated weights for policy 0, policy_version 148622 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:42,911][626795] Updated weights for policy 0, policy_version 148632 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:43,975][24592] Fps is (10 sec: 45876.7, 60 sec: 44455.7, 300 sec: 43459.2). Total num frames: 1217634304. Throughput: 0: 11010.9. Samples: 54384006. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:43,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:44,711][626795] Updated weights for policy 0, policy_version 148642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:46,391][626795] Updated weights for policy 0, policy_version 148652 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:48,187][626795] Updated weights for policy 0, policy_version 148662 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:48,976][24592] Fps is (10 sec: 46690.8, 60 sec: 44645.8, 300 sec: 43514.7). Total num frames: 1217871872. Throughput: 0: 11013.4. Samples: 54453852. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:48,980][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:50,014][626795] Updated weights for policy 0, policy_version 148672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:51,892][626795] Updated weights for policy 0, policy_version 148682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:54,829][24592] Fps is (10 sec: 36982.1, 60 sec: 42943.1, 300 sec: 43195.3). Total num frames: 1218035712. Throughput: 0: 10538.1. Samples: 54510930. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:54,831][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:55,931][626795] Updated weights for policy 0, policy_version 148692 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:57,585][626795] Updated weights for policy 0, policy_version 148702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:58,975][24592] Fps is (10 sec: 36047.6, 60 sec: 43008.0, 300 sec: 43181.6). Total num frames: 1218232320. Throughput: 0: 10426.2. Samples: 54531264. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:45:58,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:45:59,340][626795] Updated weights for policy 0, policy_version 148712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:01,199][626795] Updated weights for policy 0, policy_version 148722 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:02,924][626795] Updated weights for policy 0, policy_version 148732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:03,976][24592] Fps is (10 sec: 46575.8, 60 sec: 42871.3, 300 sec: 43181.6). Total num frames: 1218461696. Throughput: 0: 10744.6. Samples: 54600378. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:03,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:04,718][626795] Updated weights for policy 0, policy_version 148742 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:06,598][626795] Updated weights for policy 0, policy_version 148752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:08,281][626795] Updated weights for policy 0, policy_version 148762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:08,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42871.5, 300 sec: 43181.6). Total num frames: 1218691072. Throughput: 0: 10966.9. Samples: 54669366. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:08,976][24592] Avg episode reward: [(0, '4.351')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:10,056][626795] Updated weights for policy 0, policy_version 148772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:11,793][626795] Updated weights for policy 0, policy_version 148782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:13,520][626795] Updated weights for policy 0, policy_version 148792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:13,975][24592] Fps is (10 sec: 45876.1, 60 sec: 42871.5, 300 sec: 43454.7). Total num frames: 1218920448. Throughput: 0: 10965.5. Samples: 54704388. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:13,976][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:15,376][626795] Updated weights for policy 0, policy_version 148802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:17,151][626795] Updated weights for policy 0, policy_version 148812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:18,855][626795] Updated weights for policy 0, policy_version 148822 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:18,976][24592] Fps is (10 sec: 45873.3, 60 sec: 44455.7, 300 sec: 43487.0). Total num frames: 1219149824. Throughput: 0: 10971.6. Samples: 54773994. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:18,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:20,605][626795] Updated weights for policy 0, policy_version 148832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:22,396][626795] Updated weights for policy 0, policy_version 148842 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:23,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44510.3, 300 sec: 43542.6). Total num frames: 1219387392. Throughput: 0: 10994.9. Samples: 54843954. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:23,976][24592] Avg episode reward: [(0, '4.419')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:24,247][626795] Updated weights for policy 0, policy_version 148852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:25,932][626795] Updated weights for policy 0, policy_version 148862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:29,280][24592] Fps is (10 sec: 36569.1, 60 sec: 42790.5, 300 sec: 43164.7). Total num frames: 1219526656. Throughput: 0: 10903.0. Samples: 54877968. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:29,283][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:29,944][626795] Updated weights for policy 0, policy_version 148872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:31,772][626795] Updated weights for policy 0, policy_version 148882 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:33,413][626795] Updated weights for policy 0, policy_version 148892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:33,977][24592] Fps is (10 sec: 35218.7, 60 sec: 42733.8, 300 sec: 43153.5). Total num frames: 1219739648. Throughput: 0: 10398.1. Samples: 54921780. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:33,978][24592] Avg episode reward: [(0, '4.564')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:35,285][626795] Updated weights for policy 0, policy_version 148902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:36,985][626795] Updated weights for policy 0, policy_version 148912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:38,747][626795] Updated weights for policy 0, policy_version 148922 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:38,975][24592] Fps is (10 sec: 46473.5, 60 sec: 42871.4, 300 sec: 43181.7). Total num frames: 1219977216. Throughput: 0: 10890.0. Samples: 54991680. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:38,981][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:40,617][626795] Updated weights for policy 0, policy_version 148932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:42,355][626795] Updated weights for policy 0, policy_version 148942 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:43,975][24592] Fps is (10 sec: 46703.1, 60 sec: 42871.5, 300 sec: 43181.6). Total num frames: 1220206592. Throughput: 0: 10990.1. Samples: 55025820. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:43,976][24592] Avg episode reward: [(0, '4.372')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:44,103][626795] Updated weights for policy 0, policy_version 148952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:45,925][626795] Updated weights for policy 0, policy_version 148962 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:47,638][626795] Updated weights for policy 0, policy_version 148972 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:48,975][24592] Fps is (10 sec: 45875.5, 60 sec: 42735.5, 300 sec: 43487.1). Total num frames: 1220435968. Throughput: 0: 11006.0. Samples: 55095648. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:48,979][24592] Avg episode reward: [(0, '4.285')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:49,379][626795] Updated weights for policy 0, policy_version 148982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:51,139][626795] Updated weights for policy 0, policy_version 148992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:52,973][626795] Updated weights for policy 0, policy_version 149002 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:53,975][24592] Fps is (10 sec: 45874.9, 60 sec: 44460.1, 300 sec: 43514.8). Total num frames: 1220665344. Throughput: 0: 11012.1. Samples: 55164912. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:53,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:54,806][626795] Updated weights for policy 0, policy_version 149012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:56,469][626795] Updated weights for policy 0, policy_version 149022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:46:58,235][626795] Updated weights for policy 0, policy_version 149032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:58,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44509.9, 300 sec: 43542.8). Total num frames: 1220902912. Throughput: 0: 11008.0. Samples: 55199748. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:46:58,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:00,058][626795] Updated weights for policy 0, policy_version 149042 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:03,975][24592] Fps is (10 sec: 36044.9, 60 sec: 42735.0, 300 sec: 43181.5). Total num frames: 1221025792. Throughput: 0: 10723.9. Samples: 55256568. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:03,977][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:04,035][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000149052_1221033984.pth...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:04,040][626795] Updated weights for policy 0, policy_version 149052 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:04,086][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000147783_1210638336.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:05,786][626795] Updated weights for policy 0, policy_version 149062 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:07,559][626795] Updated weights for policy 0, policy_version 149072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:08,975][24592] Fps is (10 sec: 35225.5, 60 sec: 42734.9, 300 sec: 43153.8). Total num frames: 1221255168. Throughput: 0: 10417.3. Samples: 55312734. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:08,977][24592] Avg episode reward: [(0, '4.352')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:09,335][626795] Updated weights for policy 0, policy_version 149082 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:11,023][626795] Updated weights for policy 0, policy_version 149092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:12,877][626795] Updated weights for policy 0, policy_version 149102 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:13,975][24592] Fps is (10 sec: 46694.7, 60 sec: 42871.5, 300 sec: 43153.8). Total num frames: 1221492736. Throughput: 0: 10508.6. Samples: 55347648. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:13,976][24592] Avg episode reward: [(0, '4.419')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:14,624][626795] Updated weights for policy 0, policy_version 149112 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:16,307][626795] Updated weights for policy 0, policy_version 149122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:18,123][626795] Updated weights for policy 0, policy_version 149132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:18,976][24592] Fps is (10 sec: 46692.3, 60 sec: 42871.4, 300 sec: 43181.5). Total num frames: 1221722112. Throughput: 0: 11013.6. Samples: 55417374. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:18,978][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:19,909][626795] Updated weights for policy 0, policy_version 149142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:21,681][626795] Updated weights for policy 0, policy_version 149152 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:23,396][626795] Updated weights for policy 0, policy_version 149162 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:23,975][24592] Fps is (10 sec: 45875.1, 60 sec: 42734.9, 300 sec: 43514.8). Total num frames: 1221951488. Throughput: 0: 11009.2. Samples: 55487094. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:23,977][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:25,203][626795] Updated weights for policy 0, policy_version 149172 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:26,980][626795] Updated weights for policy 0, policy_version 149182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:28,755][626795] Updated weights for policy 0, policy_version 149192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:28,975][24592] Fps is (10 sec: 46696.5, 60 sec: 44600.1, 300 sec: 43514.9). Total num frames: 1222189056. Throughput: 0: 11031.3. Samples: 55522230. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:28,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:30,461][626795] Updated weights for policy 0, policy_version 149202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:32,229][626795] Updated weights for policy 0, policy_version 149212 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:33,975][24592] Fps is (10 sec: 46694.7, 60 sec: 44647.8, 300 sec: 43514.8). Total num frames: 1222418432. Throughput: 0: 11015.1. Samples: 55591326. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:33,977][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:34,012][626795] Updated weights for policy 0, policy_version 149222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:35,819][626795] Updated weights for policy 0, policy_version 149232 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:38,976][24592] Fps is (10 sec: 36043.4, 60 sec: 42871.2, 300 sec: 43181.6). Total num frames: 1222549504. Throughput: 0: 10443.9. Samples: 55634892. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:38,977][24592] Avg episode reward: [(0, '4.371')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:39,753][626795] Updated weights for policy 0, policy_version 149242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:41,552][626795] Updated weights for policy 0, policy_version 149252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:43,382][626795] Updated weights for policy 0, policy_version 149262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:43,975][24592] Fps is (10 sec: 36044.7, 60 sec: 42871.5, 300 sec: 43181.6). Total num frames: 1222778880. Throughput: 0: 10441.5. Samples: 55669614. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:43,977][24592] Avg episode reward: [(0, '4.463')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:45,042][626795] Updated weights for policy 0, policy_version 149272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:46,784][626795] Updated weights for policy 0, policy_version 149282 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:48,677][626795] Updated weights for policy 0, policy_version 149292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:48,975][24592] Fps is (10 sec: 46696.0, 60 sec: 43008.0, 300 sec: 43209.3). Total num frames: 1223016448. Throughput: 0: 10714.0. Samples: 55738698. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:48,976][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:50,439][626795] Updated weights for policy 0, policy_version 149302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:52,211][626795] Updated weights for policy 0, policy_version 149312 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:53,976][24592] Fps is (10 sec: 46692.4, 60 sec: 43007.8, 300 sec: 43181.5). Total num frames: 1223245824. Throughput: 0: 11030.7. Samples: 55809120. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:53,976][626795] Updated weights for policy 0, policy_version 149322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:53,977][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:55,790][626795] Updated weights for policy 0, policy_version 149332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:57,457][626795] Updated weights for policy 0, policy_version 149342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:58,975][24592] Fps is (10 sec: 45875.5, 60 sec: 42871.5, 300 sec: 43514.8). Total num frames: 1223475200. Throughput: 0: 11017.2. Samples: 55843422. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:47:58,977][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:47:59,244][626795] Updated weights for policy 0, policy_version 149352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:01,021][626795] Updated weights for policy 0, policy_version 149362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:02,774][626795] Updated weights for policy 0, policy_version 149372 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:03,976][24592] Fps is (10 sec: 45876.2, 60 sec: 44646.3, 300 sec: 43514.8). Total num frames: 1223704576. Throughput: 0: 11012.7. Samples: 55912944. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:03,977][24592] Avg episode reward: [(0, '4.359')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:04,590][626795] Updated weights for policy 0, policy_version 149382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:06,355][626795] Updated weights for policy 0, policy_version 149392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:08,090][626795] Updated weights for policy 0, policy_version 149402 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:08,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44782.9, 300 sec: 43542.6). Total num frames: 1223942144. Throughput: 0: 11009.5. Samples: 55982520. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:08,977][24592] Avg episode reward: [(0, '4.362')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:09,924][626795] Updated weights for policy 0, policy_version 149412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:13,861][626795] Updated weights for policy 0, policy_version 149422 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:13,975][24592] Fps is (10 sec: 36045.4, 60 sec: 42871.4, 300 sec: 43181.6). Total num frames: 1224065024. Throughput: 0: 10693.3. Samples: 56003430. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:13,976][24592] Avg episode reward: [(0, '4.432')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:15,616][626795] Updated weights for policy 0, policy_version 149432 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:17,370][626795] Updated weights for policy 0, policy_version 149442 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:18,975][24592] Fps is (10 sec: 36044.8, 60 sec: 43008.3, 300 sec: 43209.8). Total num frames: 1224302592. Throughput: 0: 10442.1. Samples: 56061222. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:18,976][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:19,180][626795] Updated weights for policy 0, policy_version 149452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:20,921][626795] Updated weights for policy 0, policy_version 149462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:22,697][626795] Updated weights for policy 0, policy_version 149472 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:23,975][24592] Fps is (10 sec: 46694.5, 60 sec: 43008.0, 300 sec: 43210.1). Total num frames: 1224531968. Throughput: 0: 11025.0. Samples: 56131014. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:23,976][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:24,380][626795] Updated weights for policy 0, policy_version 149482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:26,140][626795] Updated weights for policy 0, policy_version 149492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:27,877][626795] Updated weights for policy 0, policy_version 149502 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:28,975][24592] Fps is (10 sec: 45875.0, 60 sec: 42871.4, 300 sec: 43181.6). Total num frames: 1224761344. Throughput: 0: 11036.0. Samples: 56166234. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:28,976][24592] Avg episode reward: [(0, '4.305')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:29,723][626795] Updated weights for policy 0, policy_version 149512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:31,478][626795] Updated weights for policy 0, policy_version 149522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:33,175][626795] Updated weights for policy 0, policy_version 149532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:33,975][24592] Fps is (10 sec: 46694.2, 60 sec: 43007.9, 300 sec: 43542.6). Total num frames: 1224998912. Throughput: 0: 11054.9. Samples: 56236170. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:33,977][24592] Avg episode reward: [(0, '4.251')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:34,894][626795] Updated weights for policy 0, policy_version 149542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:36,483][626795] Updated weights for policy 0, policy_version 149552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:38,250][626795] Updated weights for policy 0, policy_version 149562 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:38,975][24592] Fps is (10 sec: 48333.2, 60 sec: 44919.8, 300 sec: 43598.2). Total num frames: 1225244672. Throughput: 0: 11090.1. Samples: 56308170. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:38,977][24592] Avg episode reward: [(0, '4.303')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:39,932][626795] Updated weights for policy 0, policy_version 149572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:41,676][626795] Updated weights for policy 0, policy_version 149582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:43,431][626795] Updated weights for policy 0, policy_version 149592 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:43,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45056.0, 300 sec: 43598.1). Total num frames: 1225482240. Throughput: 0: 11133.9. Samples: 56344446. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:43,978][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:47,792][626795] Updated weights for policy 0, policy_version 149602 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:48,975][24592] Fps is (10 sec: 34406.2, 60 sec: 42871.5, 300 sec: 43181.6). Total num frames: 1225588736. Throughput: 0: 10470.2. Samples: 56384100. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:48,976][24592] Avg episode reward: [(0, '4.350')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:49,599][626795] Updated weights for policy 0, policy_version 149612 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:51,182][626795] Updated weights for policy 0, policy_version 149622 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:53,020][626795] Updated weights for policy 0, policy_version 149632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:53,975][24592] Fps is (10 sec: 34406.5, 60 sec: 43008.3, 300 sec: 43181.7). Total num frames: 1225826304. Throughput: 0: 10467.7. Samples: 56453568. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:53,978][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:54,784][626795] Updated weights for policy 0, policy_version 149642 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:56,638][626795] Updated weights for policy 0, policy_version 149652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:48:58,321][626795] Updated weights for policy 0, policy_version 149662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:58,975][24592] Fps is (10 sec: 46694.5, 60 sec: 43008.0, 300 sec: 43181.6). Total num frames: 1226055680. Throughput: 0: 10772.5. Samples: 56488194. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:48:58,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:00,166][626795] Updated weights for policy 0, policy_version 149672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:01,885][626795] Updated weights for policy 0, policy_version 149682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:03,659][626795] Updated weights for policy 0, policy_version 149692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:03,975][24592] Fps is (10 sec: 46693.9, 60 sec: 43144.6, 300 sec: 43209.3). Total num frames: 1226293248. Throughput: 0: 11035.8. Samples: 56557836. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:03,976][24592] Avg episode reward: [(0, '4.335')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:03,978][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000149694_1226293248.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000148424_1215889408.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:05,453][626795] Updated weights for policy 0, policy_version 149702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:07,211][626795] Updated weights for policy 0, policy_version 149712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:08,895][626795] Updated weights for policy 0, policy_version 149722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:08,975][24592] Fps is (10 sec: 46693.7, 60 sec: 43007.9, 300 sec: 43542.6). Total num frames: 1226522624. Throughput: 0: 11057.3. Samples: 56628594. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:08,979][24592] Avg episode reward: [(0, '4.339')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:10,644][626795] Updated weights for policy 0, policy_version 149732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:12,327][626795] Updated weights for policy 0, policy_version 149742 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:13,976][24592] Fps is (10 sec: 46691.8, 60 sec: 44919.0, 300 sec: 43570.3). Total num frames: 1226760192. Throughput: 0: 11054.1. Samples: 56663676. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:13,978][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:14,239][626795] Updated weights for policy 0, policy_version 149752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:15,832][626795] Updated weights for policy 0, policy_version 149762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:17,628][626795] Updated weights for policy 0, policy_version 149772 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:18,975][24592] Fps is (10 sec: 46695.2, 60 sec: 44782.9, 300 sec: 43542.6). Total num frames: 1226989568. Throughput: 0: 11045.6. Samples: 56733222. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:18,976][24592] Avg episode reward: [(0, '4.300')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:19,452][626795] Updated weights for policy 0, policy_version 149782 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:23,437][626795] Updated weights for policy 0, policy_version 149792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:23,975][24592] Fps is (10 sec: 36047.0, 60 sec: 43144.5, 300 sec: 43237.1). Total num frames: 1227120640. Throughput: 0: 10421.8. Samples: 56777154. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:23,976][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:25,281][626795] Updated weights for policy 0, policy_version 149802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:27,020][626795] Updated weights for policy 0, policy_version 149812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:28,711][626795] Updated weights for policy 0, policy_version 149822 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:28,976][24592] Fps is (10 sec: 36043.8, 60 sec: 43144.4, 300 sec: 43209.3). Total num frames: 1227350016. Throughput: 0: 10381.5. Samples: 56811618. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:28,976][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:30,523][626795] Updated weights for policy 0, policy_version 149832 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:32,266][626795] Updated weights for policy 0, policy_version 149842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:33,929][626795] Updated weights for policy 0, policy_version 149852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:33,975][24592] Fps is (10 sec: 46694.6, 60 sec: 43144.6, 300 sec: 43237.1). Total num frames: 1227587584. Throughput: 0: 11066.0. Samples: 56882070. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:33,976][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:35,745][626795] Updated weights for policy 0, policy_version 149862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:37,532][626795] Updated weights for policy 0, policy_version 149872 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:38,975][24592] Fps is (10 sec: 46695.6, 60 sec: 42871.5, 300 sec: 43553.3). Total num frames: 1227816960. Throughput: 0: 11079.7. Samples: 56952156. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:38,976][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:39,272][626795] Updated weights for policy 0, policy_version 149882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:41,124][626795] Updated weights for policy 0, policy_version 149892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:42,780][626795] Updated weights for policy 0, policy_version 149902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:43,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42734.9, 300 sec: 43570.3). Total num frames: 1228046336. Throughput: 0: 11075.5. Samples: 56986590. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:43,977][24592] Avg episode reward: [(0, '4.313')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:44,571][626795] Updated weights for policy 0, policy_version 149912 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:46,288][626795] Updated weights for policy 0, policy_version 149922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:48,051][626795] Updated weights for policy 0, policy_version 149932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:48,976][24592] Fps is (10 sec: 45874.3, 60 sec: 44782.8, 300 sec: 43570.4). Total num frames: 1228275712. Throughput: 0: 11090.1. Samples: 57056892. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:48,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:49,776][626795] Updated weights for policy 0, policy_version 149942 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:51,600][626795] Updated weights for policy 0, policy_version 149952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:53,403][626795] Updated weights for policy 0, policy_version 149962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:53,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44646.4, 300 sec: 43570.3). Total num frames: 1228505088. Throughput: 0: 11052.3. Samples: 57125946. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:53,978][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:57,408][626795] Updated weights for policy 0, policy_version 149972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:58,975][24592] Fps is (10 sec: 36045.6, 60 sec: 43008.0, 300 sec: 43209.3). Total num frames: 1228636160. Throughput: 0: 10455.8. Samples: 57134178. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:49:58,977][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:49:59,183][626795] Updated weights for policy 0, policy_version 149982 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:00,881][626795] Updated weights for policy 0, policy_version 149992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:02,688][626795] Updated weights for policy 0, policy_version 150002 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:03,975][24592] Fps is (10 sec: 36044.9, 60 sec: 42871.5, 300 sec: 43209.3). Total num frames: 1228865536. Throughput: 0: 10468.0. Samples: 57204282. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:03,976][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:04,514][626795] Updated weights for policy 0, policy_version 150012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:06,163][626795] Updated weights for policy 0, policy_version 150022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:08,055][626795] Updated weights for policy 0, policy_version 150032 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:08,975][24592] Fps is (10 sec: 46694.4, 60 sec: 43008.1, 300 sec: 43237.1). Total num frames: 1229103104. Throughput: 0: 11029.6. Samples: 57273486. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:08,976][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:09,728][626795] Updated weights for policy 0, policy_version 150042 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:11,519][626795] Updated weights for policy 0, policy_version 150052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:13,279][626795] Updated weights for policy 0, policy_version 150062 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:13,975][24592] Fps is (10 sec: 47513.3, 60 sec: 43008.4, 300 sec: 43583.0). Total num frames: 1229340672. Throughput: 0: 11049.4. Samples: 57308838. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:13,978][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:15,106][626795] Updated weights for policy 0, policy_version 150072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:16,702][626795] Updated weights for policy 0, policy_version 150082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:18,513][626795] Updated weights for policy 0, policy_version 150092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:18,976][24592] Fps is (10 sec: 46691.3, 60 sec: 43007.5, 300 sec: 43570.3). Total num frames: 1229570048. Throughput: 0: 11036.8. Samples: 57378732. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:18,978][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:20,330][626795] Updated weights for policy 0, policy_version 150102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:21,983][626795] Updated weights for policy 0, policy_version 150112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:23,784][626795] Updated weights for policy 0, policy_version 150122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:23,975][24592] Fps is (10 sec: 46694.5, 60 sec: 44783.0, 300 sec: 43598.1). Total num frames: 1229807616. Throughput: 0: 11040.4. Samples: 57448974. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:23,976][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:25,623][626795] Updated weights for policy 0, policy_version 150132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:27,239][626795] Updated weights for policy 0, policy_version 150142 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:31,109][24592] Fps is (10 sec: 37811.0, 60 sec: 43113.6, 300 sec: 43257.6). Total num frames: 1230028800. Throughput: 0: 10551.1. Samples: 57483900. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:31,110][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:31,351][626795] Updated weights for policy 0, policy_version 150152 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:33,124][626795] Updated weights for policy 0, policy_version 150162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:33,975][24592] Fps is (10 sec: 35225.6, 60 sec: 42871.5, 300 sec: 43237.1). Total num frames: 1230159872. Throughput: 0: 10437.2. Samples: 57526566. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:33,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:34,979][626795] Updated weights for policy 0, policy_version 150172 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:36,468][626772] Signal inference workers to stop experience collection... (850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:36,469][626772] Signal inference workers to resume experience collection... (850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:36,476][626795] InferenceWorker_p0-w0: stopping experience collection (850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:36,481][626795] InferenceWorker_p0-w0: resuming experience collection (850 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:36,643][626795] Updated weights for policy 0, policy_version 150182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:38,482][626795] Updated weights for policy 0, policy_version 150192 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:38,975][24592] Fps is (10 sec: 45820.1, 60 sec: 42871.5, 300 sec: 43237.1). Total num frames: 1230389248. Throughput: 0: 10441.2. Samples: 57595800. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:38,977][24592] Avg episode reward: [(0, '4.307')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:40,248][626795] Updated weights for policy 0, policy_version 150202 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:41,996][626795] Updated weights for policy 0, policy_version 150212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:43,757][626795] Updated weights for policy 0, policy_version 150222 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:43,980][24592] Fps is (10 sec: 46670.8, 60 sec: 43004.4, 300 sec: 43236.5). Total num frames: 1230626816. Throughput: 0: 11036.3. Samples: 57630870. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:43,981][24592] Avg episode reward: [(0, '4.435')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:45,546][626795] Updated weights for policy 0, policy_version 150232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:47,150][626795] Updated weights for policy 0, policy_version 150242 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:48,975][24592] Fps is (10 sec: 46694.2, 60 sec: 43008.1, 300 sec: 43585.4). Total num frames: 1230856192. Throughput: 0: 11035.9. Samples: 57700896. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:48,979][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:49,113][626795] Updated weights for policy 0, policy_version 150252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:50,783][626795] Updated weights for policy 0, policy_version 150262 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:52,560][626795] Updated weights for policy 0, policy_version 150272 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:53,975][24592] Fps is (10 sec: 46717.6, 60 sec: 43144.5, 300 sec: 43598.1). Total num frames: 1231093760. Throughput: 0: 11045.7. Samples: 57770544. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:53,976][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:54,356][626795] Updated weights for policy 0, policy_version 150282 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:56,119][626795] Updated weights for policy 0, policy_version 150292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:57,813][626795] Updated weights for policy 0, policy_version 150302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:58,975][24592] Fps is (10 sec: 46694.2, 60 sec: 44782.9, 300 sec: 43598.1). Total num frames: 1231323136. Throughput: 0: 11014.8. Samples: 57804504. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:50:58,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:50:59,666][626795] Updated weights for policy 0, policy_version 150312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:01,380][626795] Updated weights for policy 0, policy_version 150322 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:03,149][626795] Updated weights for policy 0, policy_version 150332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:05,668][24592] Fps is (10 sec: 37133.4, 60 sec: 43156.0, 300 sec: 43266.6). Total num frames: 1231527936. Throughput: 0: 10625.5. Samples: 57874854. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:05,668][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:05,670][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000150333_1231527936.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:05,755][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000149052_1221033984.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:07,322][626795] Updated weights for policy 0, policy_version 150342 (0.2318)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:08,976][24592] Fps is (10 sec: 35222.7, 60 sec: 42870.8, 300 sec: 43237.0). Total num frames: 1231675392. Throughput: 0: 10393.7. Samples: 57916698. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:08,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:09,123][626795] Updated weights for policy 0, policy_version 150352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:10,743][626795] Updated weights for policy 0, policy_version 150362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:12,558][626795] Updated weights for policy 0, policy_version 150372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:13,975][24592] Fps is (10 sec: 45360.0, 60 sec: 42735.0, 300 sec: 43237.2). Total num frames: 1231904768. Throughput: 0: 10907.9. Samples: 57951486. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:13,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:14,343][626795] Updated weights for policy 0, policy_version 150382 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:16,158][626795] Updated weights for policy 0, policy_version 150392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:17,852][626795] Updated weights for policy 0, policy_version 150402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:18,976][24592] Fps is (10 sec: 45875.0, 60 sec: 42734.7, 300 sec: 43209.2). Total num frames: 1232134144. Throughput: 0: 10997.8. Samples: 58021476. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:18,978][24592] Avg episode reward: [(0, '4.237')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:19,730][626795] Updated weights for policy 0, policy_version 150412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:21,415][626795] Updated weights for policy 0, policy_version 150422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:23,179][626795] Updated weights for policy 0, policy_version 150432 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:23,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42734.9, 300 sec: 43587.6). Total num frames: 1232371712. Throughput: 0: 10993.5. Samples: 58090506. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:23,977][24592] Avg episode reward: [(0, '4.320')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:25,021][626795] Updated weights for policy 0, policy_version 150442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:26,704][626795] Updated weights for policy 0, policy_version 150452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:28,492][626795] Updated weights for policy 0, policy_version 150462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:28,975][24592] Fps is (10 sec: 46698.7, 60 sec: 44452.0, 300 sec: 43598.4). Total num frames: 1232601088. Throughput: 0: 10984.0. Samples: 58125096. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:28,977][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:30,301][626795] Updated weights for policy 0, policy_version 150472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:32,047][626795] Updated weights for policy 0, policy_version 150482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:33,777][626795] Updated weights for policy 0, policy_version 150492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:33,976][24592] Fps is (10 sec: 45873.3, 60 sec: 44509.5, 300 sec: 43570.3). Total num frames: 1232830464. Throughput: 0: 10972.8. Samples: 58194678. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:33,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:35,544][626795] Updated weights for policy 0, policy_version 150502 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:37,232][626795] Updated weights for policy 0, policy_version 150512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:40,244][24592] Fps is (10 sec: 37075.5, 60 sec: 42919.6, 300 sec: 43245.5). Total num frames: 1233018880. Throughput: 0: 9932.1. Samples: 58230090. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:40,245][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:41,465][626795] Updated weights for policy 0, policy_version 150522 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:43,171][626795] Updated weights for policy 0, policy_version 150532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:43,975][24592] Fps is (10 sec: 36046.0, 60 sec: 42738.5, 300 sec: 43237.1). Total num frames: 1233190912. Throughput: 0: 10398.5. Samples: 58272438. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:43,977][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:44,988][626795] Updated weights for policy 0, policy_version 150542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:46,724][626795] Updated weights for policy 0, policy_version 150552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:48,488][626795] Updated weights for policy 0, policy_version 150562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:48,976][24592] Fps is (10 sec: 45971.5, 60 sec: 42734.7, 300 sec: 43237.1). Total num frames: 1233420288. Throughput: 0: 10774.7. Samples: 58341486. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:48,977][24592] Avg episode reward: [(0, '4.312')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:50,310][626795] Updated weights for policy 0, policy_version 150572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:52,016][626795] Updated weights for policy 0, policy_version 150582 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:53,772][626795] Updated weights for policy 0, policy_version 150592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:53,975][24592] Fps is (10 sec: 45875.5, 60 sec: 42598.4, 300 sec: 43209.3). Total num frames: 1233649664. Throughput: 0: 10994.5. Samples: 58411440. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:53,979][24592] Avg episode reward: [(0, '4.222')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:55,652][626795] Updated weights for policy 0, policy_version 150602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:57,356][626795] Updated weights for policy 0, policy_version 150612 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:58,976][24592] Fps is (10 sec: 46694.3, 60 sec: 42734.7, 300 sec: 43598.0). Total num frames: 1233887232. Throughput: 0: 10984.0. Samples: 58445772. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:51:58,977][24592] Avg episode reward: [(0, '4.419')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:51:59,134][626795] Updated weights for policy 0, policy_version 150622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:00,846][626795] Updated weights for policy 0, policy_version 150632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:02,635][626795] Updated weights for policy 0, policy_version 150642 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:03,976][24592] Fps is (10 sec: 46690.8, 60 sec: 44396.2, 300 sec: 43598.0). Total num frames: 1234116608. Throughput: 0: 10988.0. Samples: 58515936. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:03,977][24592] Avg episode reward: [(0, '4.374')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:04,495][626795] Updated weights for policy 0, policy_version 150652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:06,185][626795] Updated weights for policy 0, policy_version 150662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:07,946][626795] Updated weights for policy 0, policy_version 150672 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:08,975][24592] Fps is (10 sec: 45877.2, 60 sec: 44510.6, 300 sec: 43570.3). Total num frames: 1234345984. Throughput: 0: 10980.8. Samples: 58584642. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:08,977][24592] Avg episode reward: [(0, '4.410')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:09,838][626795] Updated weights for policy 0, policy_version 150682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:11,533][626795] Updated weights for policy 0, policy_version 150692 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:14,844][24592] Fps is (10 sec: 36183.3, 60 sec: 42798.3, 300 sec: 43221.0). Total num frames: 1234509824. Throughput: 0: 10779.1. Samples: 58619514. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:14,845][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:15,629][626795] Updated weights for policy 0, policy_version 150702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:17,428][626795] Updated weights for policy 0, policy_version 150712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:18,975][24592] Fps is (10 sec: 36044.7, 60 sec: 42872.2, 300 sec: 43237.1). Total num frames: 1234706432. Throughput: 0: 10376.5. Samples: 58661616. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:18,977][24592] Avg episode reward: [(0, '4.303')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:19,150][626795] Updated weights for policy 0, policy_version 150722 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:20,951][626795] Updated weights for policy 0, policy_version 150732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:22,712][626795] Updated weights for policy 0, policy_version 150742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:23,975][24592] Fps is (10 sec: 46647.9, 60 sec: 42734.9, 300 sec: 43209.3). Total num frames: 1234935808. Throughput: 0: 11461.3. Samples: 58731306. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:23,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:24,507][626795] Updated weights for policy 0, policy_version 150752 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:26,178][626795] Updated weights for policy 0, policy_version 150762 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:27,894][626795] Updated weights for policy 0, policy_version 150772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:28,975][24592] Fps is (10 sec: 45875.1, 60 sec: 42734.9, 300 sec: 43209.3). Total num frames: 1235165184. Throughput: 0: 10980.2. Samples: 58766544. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:28,977][24592] Avg episode reward: [(0, '4.391')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:29,635][626795] Updated weights for policy 0, policy_version 150782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:31,404][626795] Updated weights for policy 0, policy_version 150792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:33,126][626795] Updated weights for policy 0, policy_version 150802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:33,975][24592] Fps is (10 sec: 47513.5, 60 sec: 43008.3, 300 sec: 43598.1). Total num frames: 1235410944. Throughput: 0: 11032.2. Samples: 58837932. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:33,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:34,840][626795] Updated weights for policy 0, policy_version 150812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:36,577][626795] Updated weights for policy 0, policy_version 150822 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:38,400][626795] Updated weights for policy 0, policy_version 150832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:38,976][24592] Fps is (10 sec: 47511.9, 60 sec: 44634.2, 300 sec: 43598.0). Total num frames: 1235640320. Throughput: 0: 11028.7. Samples: 58907736. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:38,978][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:40,132][626795] Updated weights for policy 0, policy_version 150842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:41,933][626795] Updated weights for policy 0, policy_version 150852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:43,680][626795] Updated weights for policy 0, policy_version 150862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:43,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44646.4, 300 sec: 43570.3). Total num frames: 1235869696. Throughput: 0: 11033.4. Samples: 58942272. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:43,976][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:45,450][626795] Updated weights for policy 0, policy_version 150872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:47,294][626795] Updated weights for policy 0, policy_version 150882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:48,874][626795] Updated weights for policy 0, policy_version 150892 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:48,975][24592] Fps is (10 sec: 46696.2, 60 sec: 44783.2, 300 sec: 43598.2). Total num frames: 1236107264. Throughput: 0: 11029.4. Samples: 59012250. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:48,977][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:50,796][626795] Updated weights for policy 0, policy_version 150902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:52,543][626795] Updated weights for policy 0, policy_version 150912 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:53,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44782.9, 300 sec: 43598.1). Total num frames: 1236336640. Throughput: 0: 11017.6. Samples: 59080434. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:53,977][24592] Avg episode reward: [(0, '4.399')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:54,497][626795] Updated weights for policy 0, policy_version 150922 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:56,165][626795] Updated weights for policy 0, policy_version 150932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:52:58,056][626795] Updated weights for policy 0, policy_version 150942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:58,975][24592] Fps is (10 sec: 44236.8, 60 sec: 44373.6, 300 sec: 43542.6). Total num frames: 1236549632. Throughput: 0: 11210.3. Samples: 59114244. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:52:58,976][24592] Avg episode reward: [(0, '4.257')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:00,111][626795] Updated weights for policy 0, policy_version 150952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:01,826][626795] Updated weights for policy 0, policy_version 150962 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:05,443][24592] Fps is (10 sec: 31430.6, 60 sec: 41981.4, 300 sec: 43023.0). Total num frames: 1236697088. Throughput: 0: 10438.9. Samples: 59146692. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:05,445][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:05,477][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000150965_1236705280.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:05,550][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000149694_1226293248.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:06,777][626795] Updated weights for policy 0, policy_version 150972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:08,581][626795] Updated weights for policy 0, policy_version 150982 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:08,975][24592] Fps is (10 sec: 31129.5, 60 sec: 41915.7, 300 sec: 43375.9). Total num frames: 1236860928. Throughput: 0: 10700.1. Samples: 59212812. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:08,977][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:10,374][626795] Updated weights for policy 0, policy_version 150992 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:12,049][626795] Updated weights for policy 0, policy_version 151002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:13,889][626795] Updated weights for policy 0, policy_version 151012 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:13,975][24592] Fps is (10 sec: 46087.6, 60 sec: 43639.4, 300 sec: 43348.2). Total num frames: 1237090304. Throughput: 0: 10688.5. Samples: 59247528. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:13,977][24592] Avg episode reward: [(0, '4.356')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:15,472][626795] Updated weights for policy 0, policy_version 151022 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:17,224][626795] Updated weights for policy 0, policy_version 151032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:18,952][626795] Updated weights for policy 0, policy_version 151042 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:18,975][24592] Fps is (10 sec: 47513.8, 60 sec: 43827.2, 300 sec: 43403.7). Total num frames: 1237336064. Throughput: 0: 10679.4. Samples: 59318502. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:18,976][24592] Avg episode reward: [(0, '4.301')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:20,617][626795] Updated weights for policy 0, policy_version 151052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:22,372][626795] Updated weights for policy 0, policy_version 151062 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:23,975][24592] Fps is (10 sec: 47513.7, 60 sec: 43827.3, 300 sec: 43403.7). Total num frames: 1237565440. Throughput: 0: 10714.0. Samples: 59389860. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:23,977][24592] Avg episode reward: [(0, '4.165')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:24,236][626795] Updated weights for policy 0, policy_version 151072 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:25,937][626795] Updated weights for policy 0, policy_version 151082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:27,674][626795] Updated weights for policy 0, policy_version 151092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:28,975][24592] Fps is (10 sec: 46694.1, 60 sec: 43963.7, 300 sec: 43403.7). Total num frames: 1237803008. Throughput: 0: 10722.0. Samples: 59424762. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:28,976][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:29,396][626795] Updated weights for policy 0, policy_version 151102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:31,155][626795] Updated weights for policy 0, policy_version 151112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:32,899][626795] Updated weights for policy 0, policy_version 151122 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:33,982][24592] Fps is (10 sec: 46661.2, 60 sec: 43685.5, 300 sec: 43347.1). Total num frames: 1238032384. Throughput: 0: 10738.8. Samples: 59495574. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:33,985][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:34,749][626795] Updated weights for policy 0, policy_version 151132 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:36,440][626795] Updated weights for policy 0, policy_version 151142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:40,403][24592] Fps is (10 sec: 35125.0, 60 sec: 41741.7, 300 sec: 42918.3). Total num frames: 1238204416. Throughput: 0: 9680.4. Samples: 59529876. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:40,404][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:41,003][626795] Updated weights for policy 0, policy_version 151152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:42,680][626795] Updated weights for policy 0, policy_version 151162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:43,976][24592] Fps is (10 sec: 34430.2, 60 sec: 41779.1, 300 sec: 43348.2). Total num frames: 1238376448. Throughput: 0: 10080.1. Samples: 59567850. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:43,977][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:44,589][626795] Updated weights for policy 0, policy_version 151172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:46,336][626795] Updated weights for policy 0, policy_version 151182 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:48,151][626795] Updated weights for policy 0, policy_version 151192 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:48,976][24592] Fps is (10 sec: 45869.6, 60 sec: 41505.8, 300 sec: 43292.6). Total num frames: 1238597632. Throughput: 0: 11252.2. Samples: 59636526. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:48,979][24592] Avg episode reward: [(0, '4.317')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:49,798][626795] Updated weights for policy 0, policy_version 151202 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:51,734][626795] Updated weights for policy 0, policy_version 151212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:53,433][626795] Updated weights for policy 0, policy_version 151222 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:53,975][24592] Fps is (10 sec: 45876.1, 60 sec: 41642.7, 300 sec: 43320.4). Total num frames: 1238835200. Throughput: 0: 10956.4. Samples: 59705850. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:53,976][24592] Avg episode reward: [(0, '4.499')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:55,181][626795] Updated weights for policy 0, policy_version 151232 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:56,984][626795] Updated weights for policy 0, policy_version 151242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:53:58,597][626795] Updated weights for policy 0, policy_version 151252 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:58,976][24592] Fps is (10 sec: 46695.0, 60 sec: 41915.5, 300 sec: 43292.6). Total num frames: 1239064576. Throughput: 0: 10966.4. Samples: 59741022. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:53:58,977][24592] Avg episode reward: [(0, '4.361')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:00,474][626795] Updated weights for policy 0, policy_version 151262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:02,259][626795] Updated weights for policy 0, policy_version 151272 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:03,959][626795] Updated weights for policy 0, policy_version 151282 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:03,978][24592] Fps is (10 sec: 46682.7, 60 sec: 44504.7, 300 sec: 43320.1). Total num frames: 1239302144. Throughput: 0: 10936.7. Samples: 59810682. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:03,979][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:05,760][626795] Updated weights for policy 0, policy_version 151292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:07,524][626795] Updated weights for policy 0, policy_version 151302 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:08,976][24592] Fps is (10 sec: 46695.2, 60 sec: 44509.7, 300 sec: 43292.7). Total num frames: 1239531520. Throughput: 0: 10888.2. Samples: 59879832. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:08,977][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:09,254][626795] Updated weights for policy 0, policy_version 151312 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:10,984][626795] Updated weights for policy 0, policy_version 151322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:14,881][24592] Fps is (10 sec: 36815.6, 60 sec: 42906.5, 300 sec: 42966.3). Total num frames: 1239703552. Throughput: 0: 10684.4. Samples: 59915238. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:14,882][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:15,060][626795] Updated weights for policy 0, policy_version 151332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:16,775][626795] Updated weights for policy 0, policy_version 151342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:18,505][626795] Updated weights for policy 0, policy_version 151352 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:18,979][24592] Fps is (10 sec: 36034.1, 60 sec: 42596.1, 300 sec: 43292.2). Total num frames: 1239891968. Throughput: 0: 10300.1. Samples: 59959038. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:18,980][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:20,286][626795] Updated weights for policy 0, policy_version 151362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:22,082][626795] Updated weights for policy 0, policy_version 151372 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:23,852][626795] Updated weights for policy 0, policy_version 151382 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:23,976][24592] Fps is (10 sec: 46839.6, 60 sec: 42734.8, 300 sec: 43320.4). Total num frames: 1240129536. Throughput: 0: 11466.2. Samples: 60029484. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:23,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:25,575][626795] Updated weights for policy 0, policy_version 151392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:27,250][626795] Updated weights for policy 0, policy_version 151402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:28,976][24592] Fps is (10 sec: 45888.2, 60 sec: 42461.6, 300 sec: 43264.8). Total num frames: 1240350720. Throughput: 0: 11025.8. Samples: 60064014. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:28,978][24592] Avg episode reward: [(0, '4.285')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:29,132][626795] Updated weights for policy 0, policy_version 151412 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:30,827][626795] Updated weights for policy 0, policy_version 151422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:32,632][626795] Updated weights for policy 0, policy_version 151432 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:33,975][24592] Fps is (10 sec: 45876.4, 60 sec: 42603.5, 300 sec: 43292.6). Total num frames: 1240588288. Throughput: 0: 11052.4. Samples: 60133878. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:33,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:34,364][626795] Updated weights for policy 0, policy_version 151442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:36,123][626795] Updated weights for policy 0, policy_version 151452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:37,918][626795] Updated weights for policy 0, policy_version 151462 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:38,975][24592] Fps is (10 sec: 47515.2, 60 sec: 44755.9, 300 sec: 43320.4). Total num frames: 1240825856. Throughput: 0: 11061.3. Samples: 60203610. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:38,976][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:39,699][626795] Updated weights for policy 0, policy_version 151472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:41,432][626795] Updated weights for policy 0, policy_version 151482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:43,185][626795] Updated weights for policy 0, policy_version 151492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:43,976][24592] Fps is (10 sec: 46693.5, 60 sec: 44646.4, 300 sec: 43320.4). Total num frames: 1241055232. Throughput: 0: 11052.7. Samples: 60238392. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:43,977][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:44,966][626795] Updated weights for policy 0, policy_version 151502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:46,669][626795] Updated weights for policy 0, policy_version 151512 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:49,446][24592] Fps is (10 sec: 35990.6, 60 sec: 43080.3, 300 sec: 42974.2). Total num frames: 1241202688. Throughput: 0: 10173.8. Samples: 60273264. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:49,446][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:50,832][626795] Updated weights for policy 0, policy_version 151522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:52,652][626795] Updated weights for policy 0, policy_version 151532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:53,975][24592] Fps is (10 sec: 36045.1, 60 sec: 43007.9, 300 sec: 43320.4). Total num frames: 1241415680. Throughput: 0: 10470.7. Samples: 60351012. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:53,977][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:54,372][626795] Updated weights for policy 0, policy_version 151542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:56,018][626795] Updated weights for policy 0, policy_version 151552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:57,816][626795] Updated weights for policy 0, policy_version 151562 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:58,976][24592] Fps is (10 sec: 46417.0, 60 sec: 43007.8, 300 sec: 43320.3). Total num frames: 1241645056. Throughput: 0: 10666.5. Samples: 60385578. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:54:58,978][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:54:59,784][626795] Updated weights for policy 0, policy_version 151572 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:01,392][626795] Updated weights for policy 0, policy_version 151582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:03,174][626795] Updated weights for policy 0, policy_version 151592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:03,976][24592] Fps is (10 sec: 45874.5, 60 sec: 42873.1, 300 sec: 43292.6). Total num frames: 1241874432. Throughput: 0: 11022.3. Samples: 60455010. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:03,976][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000151596_1241874432.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:04,035][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000150333_1231527936.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:04,911][626795] Updated weights for policy 0, policy_version 151602 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:06,756][626795] Updated weights for policy 0, policy_version 151612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:08,506][626795] Updated weights for policy 0, policy_version 151622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:08,975][24592] Fps is (10 sec: 45878.0, 60 sec: 42871.6, 300 sec: 43264.9). Total num frames: 1242103808. Throughput: 0: 10999.6. Samples: 60524466. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:08,976][24592] Avg episode reward: [(0, '4.374')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:10,287][626795] Updated weights for policy 0, policy_version 151632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:11,936][626795] Updated weights for policy 0, policy_version 151642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:13,653][626795] Updated weights for policy 0, policy_version 151652 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:13,976][24592] Fps is (10 sec: 46694.9, 60 sec: 44637.4, 300 sec: 43292.7). Total num frames: 1242341376. Throughput: 0: 11017.7. Samples: 60559806. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:13,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:15,476][626795] Updated weights for policy 0, policy_version 151662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:17,176][626795] Updated weights for policy 0, policy_version 151672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:18,975][24592] Fps is (10 sec: 46694.5, 60 sec: 44648.8, 300 sec: 43264.9). Total num frames: 1242570752. Throughput: 0: 11012.5. Samples: 60629442. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:18,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:19,002][626795] Updated weights for policy 0, policy_version 151682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:20,797][626795] Updated weights for policy 0, policy_version 151692 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:24,020][24592] Fps is (10 sec: 35069.4, 60 sec: 42703.3, 300 sec: 43237.8). Total num frames: 1242693632. Throughput: 0: 10221.7. Samples: 60664044. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:24,022][24592] Avg episode reward: [(0, '4.420')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:24,888][626795] Updated weights for policy 0, policy_version 151702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:26,647][626795] Updated weights for policy 0, policy_version 151712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:28,451][626795] Updated weights for policy 0, policy_version 151722 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:28,975][24592] Fps is (10 sec: 36044.7, 60 sec: 43008.2, 300 sec: 43292.6). Total num frames: 1242931200. Throughput: 0: 10410.7. Samples: 60706872. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:28,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:30,143][626795] Updated weights for policy 0, policy_version 151732 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:31,920][626795] Updated weights for policy 0, policy_version 151742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:33,725][626795] Updated weights for policy 0, policy_version 151752 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:33,976][24592] Fps is (10 sec: 46901.3, 60 sec: 42871.0, 300 sec: 43292.6). Total num frames: 1243160576. Throughput: 0: 11297.8. Samples: 60776358. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:33,982][24592] Avg episode reward: [(0, '4.318')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:35,447][626795] Updated weights for policy 0, policy_version 151762 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:37,212][626795] Updated weights for policy 0, policy_version 151772 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:38,943][626795] Updated weights for policy 0, policy_version 151782 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:38,975][24592] Fps is (10 sec: 46694.7, 60 sec: 42871.5, 300 sec: 43293.4). Total num frames: 1243398144. Throughput: 0: 11005.5. Samples: 60846258. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:38,976][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:40,777][626795] Updated weights for policy 0, policy_version 151792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:42,409][626795] Updated weights for policy 0, policy_version 151802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:43,975][24592] Fps is (10 sec: 46696.7, 60 sec: 42871.5, 300 sec: 43292.6). Total num frames: 1243627520. Throughput: 0: 11023.3. Samples: 60881622. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:43,976][24592] Avg episode reward: [(0, '4.378')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:44,181][626795] Updated weights for policy 0, policy_version 151812 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:46,000][626795] Updated weights for policy 0, policy_version 151822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:47,866][626795] Updated weights for policy 0, policy_version 151832 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:48,975][24592] Fps is (10 sec: 45875.1, 60 sec: 44586.3, 300 sec: 43264.9). Total num frames: 1243856896. Throughput: 0: 11019.3. Samples: 60950874. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:48,978][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:49,526][626795] Updated weights for policy 0, policy_version 151842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:51,342][626795] Updated weights for policy 0, policy_version 151852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:53,126][626795] Updated weights for policy 0, policy_version 151862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:53,976][24592] Fps is (10 sec: 45874.8, 60 sec: 44509.8, 300 sec: 43264.9). Total num frames: 1244086272. Throughput: 0: 11003.0. Samples: 61019604. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:53,977][24592] Avg episode reward: [(0, '4.291')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:54,979][626795] Updated weights for policy 0, policy_version 151872 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:58,976][24592] Fps is (10 sec: 35224.0, 60 sec: 42735.1, 300 sec: 43235.1). Total num frames: 1244209152. Throughput: 0: 10917.7. Samples: 61051104. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:55:58,977][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:55:59,044][626795] Updated weights for policy 0, policy_version 151882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:00,804][626795] Updated weights for policy 0, policy_version 151892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:02,546][626795] Updated weights for policy 0, policy_version 151902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:03,975][24592] Fps is (10 sec: 36045.5, 60 sec: 42871.7, 300 sec: 43292.8). Total num frames: 1244446720. Throughput: 0: 10396.1. Samples: 61097268. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:03,976][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:04,277][626795] Updated weights for policy 0, policy_version 151912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:06,022][626795] Updated weights for policy 0, policy_version 151922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:07,884][626795] Updated weights for policy 0, policy_version 151932 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:08,617][626772] Signal inference workers to stop experience collection... (900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:08,617][626772] Signal inference workers to resume experience collection... (900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:08,624][626795] InferenceWorker_p0-w0: stopping experience collection (900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:08,630][626795] InferenceWorker_p0-w0: resuming experience collection (900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:08,975][24592] Fps is (10 sec: 46696.6, 60 sec: 42871.5, 300 sec: 43292.6). Total num frames: 1244676096. Throughput: 0: 11181.1. Samples: 61166694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:08,976][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:09,707][626795] Updated weights for policy 0, policy_version 151942 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:11,392][626795] Updated weights for policy 0, policy_version 151952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:13,177][626795] Updated weights for policy 0, policy_version 151962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:13,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42735.1, 300 sec: 43292.8). Total num frames: 1244905472. Throughput: 0: 10980.7. Samples: 61201002. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:13,976][24592] Avg episode reward: [(0, '4.414')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:14,948][626795] Updated weights for policy 0, policy_version 151972 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:16,678][626795] Updated weights for policy 0, policy_version 151982 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:18,512][626795] Updated weights for policy 0, policy_version 151992 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:18,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42735.0, 300 sec: 43264.9). Total num frames: 1245134848. Throughput: 0: 10992.3. Samples: 61271004. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:18,976][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:20,325][626795] Updated weights for policy 0, policy_version 152002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:21,912][626795] Updated weights for policy 0, policy_version 152012 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:23,656][626795] Updated weights for policy 0, policy_version 152022 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:23,975][24592] Fps is (10 sec: 46694.1, 60 sec: 44679.6, 300 sec: 43292.6). Total num frames: 1245372416. Throughput: 0: 10992.9. Samples: 61340940. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:23,977][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:25,576][626795] Updated weights for policy 0, policy_version 152032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:27,240][626795] Updated weights for policy 0, policy_version 152042 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:28,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44509.9, 300 sec: 43292.7). Total num frames: 1245601792. Throughput: 0: 10989.1. Samples: 61376130. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:28,976][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:28,993][626795] Updated weights for policy 0, policy_version 152052 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:30,719][626795] Updated weights for policy 0, policy_version 152062 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:33,975][24592] Fps is (10 sec: 35225.7, 60 sec: 42735.3, 300 sec: 43256.5). Total num frames: 1245724672. Throughput: 0: 10548.8. Samples: 61425570. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:33,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:34,808][626795] Updated weights for policy 0, policy_version 152072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:36,652][626795] Updated weights for policy 0, policy_version 152082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:38,562][626795] Updated weights for policy 0, policy_version 152092 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:38,983][24592] Fps is (10 sec: 35199.9, 60 sec: 42593.2, 300 sec: 43263.8). Total num frames: 1245954048. Throughput: 0: 10383.0. Samples: 61486914. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:38,983][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:40,200][626795] Updated weights for policy 0, policy_version 152102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:41,964][626795] Updated weights for policy 0, policy_version 152112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:43,769][626795] Updated weights for policy 0, policy_version 152122 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:43,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42735.0, 300 sec: 43292.7). Total num frames: 1246191616. Throughput: 0: 10458.6. Samples: 61521738. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:43,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:45,510][626795] Updated weights for policy 0, policy_version 152132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:47,228][626795] Updated weights for policy 0, policy_version 152142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:48,923][626795] Updated weights for policy 0, policy_version 152152 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:48,976][24592] Fps is (10 sec: 47547.5, 60 sec: 42871.3, 300 sec: 43320.4). Total num frames: 1246429184. Throughput: 0: 10996.2. Samples: 61592100. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:48,977][24592] Avg episode reward: [(0, '4.450')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:50,692][626795] Updated weights for policy 0, policy_version 152162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:52,439][626795] Updated weights for policy 0, policy_version 152172 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:53,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42871.6, 300 sec: 43292.7). Total num frames: 1246658560. Throughput: 0: 11028.1. Samples: 61662960. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:53,977][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:54,268][626795] Updated weights for policy 0, policy_version 152182 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:55,947][626795] Updated weights for policy 0, policy_version 152192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:57,692][626795] Updated weights for policy 0, policy_version 152202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:58,976][24592] Fps is (10 sec: 46693.1, 60 sec: 44782.9, 300 sec: 43320.5). Total num frames: 1246896128. Throughput: 0: 11046.8. Samples: 61698114. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:56:58,977][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:56:59,458][626795] Updated weights for policy 0, policy_version 152212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:01,241][626795] Updated weights for policy 0, policy_version 152222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:02,963][626795] Updated weights for policy 0, policy_version 152232 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:03,975][24592] Fps is (10 sec: 46693.9, 60 sec: 44646.3, 300 sec: 43320.4). Total num frames: 1247125504. Throughput: 0: 11041.8. Samples: 61767888. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:03,977][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000152237_1247125504.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:04,073][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000150965_1236705280.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:04,827][626795] Updated weights for policy 0, policy_version 152242 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:08,975][24592] Fps is (10 sec: 34407.7, 60 sec: 42734.9, 300 sec: 43281.2). Total num frames: 1247240192. Throughput: 0: 10385.5. Samples: 61808286. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:08,976][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:09,065][626795] Updated weights for policy 0, policy_version 152252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:10,832][626795] Updated weights for policy 0, policy_version 152262 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:12,606][626795] Updated weights for policy 0, policy_version 152272 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:13,975][24592] Fps is (10 sec: 34406.6, 60 sec: 42734.9, 300 sec: 43264.9). Total num frames: 1247469568. Throughput: 0: 10366.4. Samples: 61842618. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:13,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:14,398][626795] Updated weights for policy 0, policy_version 152282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:16,114][626795] Updated weights for policy 0, policy_version 152292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:17,866][626795] Updated weights for policy 0, policy_version 152302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:18,976][24592] Fps is (10 sec: 46692.8, 60 sec: 42871.2, 300 sec: 43292.6). Total num frames: 1247707136. Throughput: 0: 10814.8. Samples: 61912242. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:18,978][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:19,618][626795] Updated weights for policy 0, policy_version 152312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:21,202][626795] Updated weights for policy 0, policy_version 152322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:22,993][626795] Updated weights for policy 0, policy_version 152332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:23,975][24592] Fps is (10 sec: 48333.0, 60 sec: 43008.0, 300 sec: 43348.2). Total num frames: 1247952896. Throughput: 0: 11074.2. Samples: 61985172. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:23,977][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:24,714][626795] Updated weights for policy 0, policy_version 152342 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:26,482][626795] Updated weights for policy 0, policy_version 152352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:28,244][626795] Updated weights for policy 0, policy_version 152362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:28,975][24592] Fps is (10 sec: 47515.5, 60 sec: 43008.0, 300 sec: 43292.6). Total num frames: 1248182272. Throughput: 0: 11059.6. Samples: 62019420. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:28,976][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:29,973][626795] Updated weights for policy 0, policy_version 152372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:31,718][626795] Updated weights for policy 0, policy_version 152382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:33,611][626795] Updated weights for policy 0, policy_version 152392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:33,975][24592] Fps is (10 sec: 45874.6, 60 sec: 44782.9, 300 sec: 43292.7). Total num frames: 1248411648. Throughput: 0: 11053.9. Samples: 62089524. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:33,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:35,352][626795] Updated weights for policy 0, policy_version 152402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:37,058][626795] Updated weights for policy 0, policy_version 152412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:38,802][626795] Updated weights for policy 0, policy_version 152422 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:38,976][24592] Fps is (10 sec: 45873.6, 60 sec: 44788.1, 300 sec: 43292.6). Total num frames: 1248641024. Throughput: 0: 11017.6. Samples: 62158758. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:38,980][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:43,099][626795] Updated weights for policy 0, policy_version 152432 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:43,975][24592] Fps is (10 sec: 35226.1, 60 sec: 42871.5, 300 sec: 42903.9). Total num frames: 1248763904. Throughput: 0: 10641.2. Samples: 62176962. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:43,976][24592] Avg episode reward: [(0, '4.379')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:44,892][626795] Updated weights for policy 0, policy_version 152442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:46,698][626795] Updated weights for policy 0, policy_version 152452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:48,414][626795] Updated weights for policy 0, policy_version 152462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:48,976][24592] Fps is (10 sec: 35226.4, 60 sec: 42735.0, 300 sec: 42903.8). Total num frames: 1248993280. Throughput: 0: 10352.7. Samples: 62233758. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:48,977][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:50,187][626795] Updated weights for policy 0, policy_version 152472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:52,102][626795] Updated weights for policy 0, policy_version 152482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:53,733][626795] Updated weights for policy 0, policy_version 152492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:53,976][24592] Fps is (10 sec: 45871.3, 60 sec: 42734.3, 300 sec: 42959.3). Total num frames: 1249222656. Throughput: 0: 10993.8. Samples: 62303016. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:53,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:55,521][626795] Updated weights for policy 0, policy_version 152502 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:57,105][626795] Updated weights for policy 0, policy_version 152512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:57:58,779][626795] Updated weights for policy 0, policy_version 152522 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:58,975][24592] Fps is (10 sec: 47514.4, 60 sec: 42871.8, 300 sec: 43509.2). Total num frames: 1249468416. Throughput: 0: 11042.5. Samples: 62339532. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:57:58,977][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:00,493][626795] Updated weights for policy 0, policy_version 152532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:02,174][626795] Updated weights for policy 0, policy_version 152542 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:03,720][626795] Updated weights for policy 0, policy_version 152552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:03,975][24592] Fps is (10 sec: 48337.0, 60 sec: 43008.1, 300 sec: 43542.6). Total num frames: 1249705984. Throughput: 0: 11127.4. Samples: 62412972. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:03,977][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:05,584][626795] Updated weights for policy 0, policy_version 152562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:07,229][626795] Updated weights for policy 0, policy_version 152572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:08,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45192.6, 300 sec: 43598.1). Total num frames: 1249951744. Throughput: 0: 11089.2. Samples: 62484186. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:08,976][626795] Updated weights for policy 0, policy_version 152582 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:08,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:10,751][626795] Updated weights for policy 0, policy_version 152592 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:12,661][626795] Updated weights for policy 0, policy_version 152602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:13,975][24592] Fps is (10 sec: 46694.2, 60 sec: 45056.0, 300 sec: 43514.8). Total num frames: 1250172928. Throughput: 0: 11099.9. Samples: 62518914. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:13,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:14,338][626795] Updated weights for policy 0, policy_version 152612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:18,949][626795] Updated weights for policy 0, policy_version 152622 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:18,975][24592] Fps is (10 sec: 32767.6, 60 sec: 42871.7, 300 sec: 43098.2). Total num frames: 1250279424. Throughput: 0: 10384.1. Samples: 62556810. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:18,978][24592] Avg episode reward: [(0, '4.264')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:20,767][626795] Updated weights for policy 0, policy_version 152632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:22,681][626795] Updated weights for policy 0, policy_version 152642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:23,975][24592] Fps is (10 sec: 31948.8, 60 sec: 42325.3, 300 sec: 43015.0). Total num frames: 1250492416. Throughput: 0: 10305.8. Samples: 62622516. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:23,976][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:24,558][626795] Updated weights for policy 0, policy_version 152652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:26,306][626795] Updated weights for policy 0, policy_version 152662 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:28,036][626795] Updated weights for policy 0, policy_version 152672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:28,975][24592] Fps is (10 sec: 45056.7, 60 sec: 42461.9, 300 sec: 43043.8). Total num frames: 1250729984. Throughput: 0: 10631.2. Samples: 62655366. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:28,976][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:29,832][626795] Updated weights for policy 0, policy_version 152682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:31,443][626795] Updated weights for policy 0, policy_version 152692 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:33,194][626795] Updated weights for policy 0, policy_version 152702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:33,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42735.0, 300 sec: 43503.2). Total num frames: 1250975744. Throughput: 0: 10985.4. Samples: 62728098. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:33,977][24592] Avg episode reward: [(0, '4.343')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:34,875][626795] Updated weights for policy 0, policy_version 152712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:36,642][626795] Updated weights for policy 0, policy_version 152722 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:38,254][626795] Updated weights for policy 0, policy_version 152732 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:38,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42871.8, 300 sec: 43514.8). Total num frames: 1251213312. Throughput: 0: 11050.1. Samples: 62800260. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:38,977][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:40,035][626795] Updated weights for policy 0, policy_version 152742 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:41,724][626795] Updated weights for policy 0, policy_version 152752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:43,422][626795] Updated weights for policy 0, policy_version 152762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:43,976][24592] Fps is (10 sec: 47511.5, 60 sec: 44782.6, 300 sec: 43570.4). Total num frames: 1251450880. Throughput: 0: 11030.0. Samples: 62835888. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:43,977][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:45,142][626795] Updated weights for policy 0, policy_version 152772 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:46,808][626795] Updated weights for policy 0, policy_version 152782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:48,532][626795] Updated weights for policy 0, policy_version 152792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:48,975][24592] Fps is (10 sec: 47513.2, 60 sec: 44919.5, 300 sec: 43570.3). Total num frames: 1251688448. Throughput: 0: 10999.3. Samples: 62907942. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:48,977][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:52,948][626795] Updated weights for policy 0, policy_version 152802 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:53,975][24592] Fps is (10 sec: 34407.9, 60 sec: 42872.1, 300 sec: 43153.9). Total num frames: 1251794944. Throughput: 0: 10281.7. Samples: 62946864. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:53,976][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:54,809][626795] Updated weights for policy 0, policy_version 152812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:56,572][626795] Updated weights for policy 0, policy_version 152822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:58:58,505][626795] Updated weights for policy 0, policy_version 152832 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:58,976][24592] Fps is (10 sec: 32767.5, 60 sec: 42461.7, 300 sec: 43098.6). Total num frames: 1252016128. Throughput: 0: 10246.4. Samples: 62980002. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:58:58,977][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:00,349][626795] Updated weights for policy 0, policy_version 152842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:02,088][626795] Updated weights for policy 0, policy_version 152852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:03,741][626795] Updated weights for policy 0, policy_version 152862 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:03,975][24592] Fps is (10 sec: 45874.8, 60 sec: 42461.8, 300 sec: 43126.0). Total num frames: 1252253696. Throughput: 0: 10928.8. Samples: 63048606. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:03,976][24592] Avg episode reward: [(0, '4.345')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:03,978][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000152863_1252253696.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:04,088][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000151596_1241874432.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:05,584][626795] Updated weights for policy 0, policy_version 152872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:07,159][626795] Updated weights for policy 0, policy_version 152882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:08,953][626795] Updated weights for policy 0, policy_version 152892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:08,976][24592] Fps is (10 sec: 47513.2, 60 sec: 42325.1, 300 sec: 43481.6). Total num frames: 1252491264. Throughput: 0: 11060.6. Samples: 63120246. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:08,977][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:10,603][626795] Updated weights for policy 0, policy_version 152902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:12,280][626795] Updated weights for policy 0, policy_version 152912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:13,921][626795] Updated weights for policy 0, policy_version 152922 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:13,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42734.9, 300 sec: 43543.0). Total num frames: 1252737024. Throughput: 0: 11140.5. Samples: 63156690. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:13,976][24592] Avg episode reward: [(0, '4.403')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:15,697][626795] Updated weights for policy 0, policy_version 152932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:17,413][626795] Updated weights for policy 0, policy_version 152942 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:18,975][24592] Fps is (10 sec: 48334.0, 60 sec: 44919.5, 300 sec: 43542.6). Total num frames: 1252974592. Throughput: 0: 11134.1. Samples: 63229134. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:18,976][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:19,185][626795] Updated weights for policy 0, policy_version 152952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:20,821][626795] Updated weights for policy 0, policy_version 152962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:22,579][626795] Updated weights for policy 0, policy_version 152972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:23,977][24592] Fps is (10 sec: 46686.2, 60 sec: 45191.2, 300 sec: 43570.1). Total num frames: 1253203968. Throughput: 0: 11100.2. Samples: 63299790. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:23,978][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:24,345][626795] Updated weights for policy 0, policy_version 152982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:28,888][626795] Updated weights for policy 0, policy_version 152992 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:28,976][24592] Fps is (10 sec: 33586.6, 60 sec: 43007.8, 300 sec: 43126.0). Total num frames: 1253310464. Throughput: 0: 10628.4. Samples: 63314166. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:28,978][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:30,667][626795] Updated weights for policy 0, policy_version 153002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:32,515][626795] Updated weights for policy 0, policy_version 153012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:33,976][24592] Fps is (10 sec: 33592.3, 60 sec: 42734.7, 300 sec: 43098.2). Total num frames: 1253539840. Throughput: 0: 10284.5. Samples: 63370746. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:33,977][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:34,340][626795] Updated weights for policy 0, policy_version 153022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:36,122][626795] Updated weights for policy 0, policy_version 153032 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:37,844][626795] Updated weights for policy 0, policy_version 153042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:38,975][24592] Fps is (10 sec: 45875.7, 60 sec: 42598.3, 300 sec: 43098.3). Total num frames: 1253769216. Throughput: 0: 10967.2. Samples: 63440388. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:38,978][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:39,624][626795] Updated weights for policy 0, policy_version 153052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:41,227][626795] Updated weights for policy 0, policy_version 153062 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:42,997][626795] Updated weights for policy 0, policy_version 153072 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:43,977][24592] Fps is (10 sec: 46685.9, 60 sec: 42597.2, 300 sec: 43472.7). Total num frames: 1254006784. Throughput: 0: 11028.6. Samples: 63476310. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:43,978][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:44,669][626795] Updated weights for policy 0, policy_version 153082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:46,401][626795] Updated weights for policy 0, policy_version 153092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:48,006][626795] Updated weights for policy 0, policy_version 153102 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:48,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42734.9, 300 sec: 43514.8). Total num frames: 1254252544. Throughput: 0: 11116.1. Samples: 63548832. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:48,976][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:49,798][626795] Updated weights for policy 0, policy_version 153112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:51,440][626795] Updated weights for policy 0, policy_version 153122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:53,262][626795] Updated weights for policy 0, policy_version 153132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:53,976][24592] Fps is (10 sec: 48341.6, 60 sec: 44919.3, 300 sec: 43542.6). Total num frames: 1254490112. Throughput: 0: 11099.5. Samples: 63619722. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:53,977][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:54,987][626795] Updated weights for policy 0, policy_version 153142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:56,750][626795] Updated weights for policy 0, policy_version 153152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 09:59:58,453][626795] Updated weights for policy 0, policy_version 153162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:58,975][24592] Fps is (10 sec: 46694.7, 60 sec: 45056.2, 300 sec: 43542.6). Total num frames: 1254719488. Throughput: 0: 11069.9. Samples: 63654834. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 09:59:58,977][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:03,117][626795] Updated weights for policy 0, policy_version 153172 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:03,975][24592] Fps is (10 sec: 33587.9, 60 sec: 42871.5, 300 sec: 43126.0). Total num frames: 1254825984. Throughput: 0: 10311.9. Samples: 63693168. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:03,976][24592] Avg episode reward: [(0, '4.372')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:04,854][626795] Updated weights for policy 0, policy_version 153182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:06,641][626795] Updated weights for policy 0, policy_version 153192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:08,374][626795] Updated weights for policy 0, policy_version 153202 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:08,975][24592] Fps is (10 sec: 32767.7, 60 sec: 42598.5, 300 sec: 43070.5). Total num frames: 1255047168. Throughput: 0: 10243.7. Samples: 63760740. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:08,976][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:10,298][626795] Updated weights for policy 0, policy_version 153212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:12,001][626795] Updated weights for policy 0, policy_version 153222 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:13,688][626795] Updated weights for policy 0, policy_version 153232 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:13,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42461.9, 300 sec: 43098.3). Total num frames: 1255284736. Throughput: 0: 10692.0. Samples: 63795306. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:13,976][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:15,402][626795] Updated weights for policy 0, policy_version 153242 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:17,184][626795] Updated weights for policy 0, policy_version 153252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:18,745][626795] Updated weights for policy 0, policy_version 153262 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:18,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42598.4, 300 sec: 43521.4). Total num frames: 1255530496. Throughput: 0: 11028.7. Samples: 63867036. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:18,976][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:20,515][626795] Updated weights for policy 0, policy_version 153272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:22,212][626795] Updated weights for policy 0, policy_version 153282 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:23,879][626795] Updated weights for policy 0, policy_version 153292 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:23,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42736.2, 300 sec: 43514.8). Total num frames: 1255768064. Throughput: 0: 11104.2. Samples: 63940074. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:23,976][24592] Avg episode reward: [(0, '4.343')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:25,532][626795] Updated weights for policy 0, policy_version 153302 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:27,355][626795] Updated weights for policy 0, policy_version 153312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:28,921][626795] Updated weights for policy 0, policy_version 153322 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:28,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45056.2, 300 sec: 43570.4). Total num frames: 1256013824. Throughput: 0: 11101.7. Samples: 63975864. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:28,976][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:30,810][626795] Updated weights for policy 0, policy_version 153332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:32,566][626795] Updated weights for policy 0, policy_version 153342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:33,976][24592] Fps is (10 sec: 47511.1, 60 sec: 45055.8, 300 sec: 43542.5). Total num frames: 1256243200. Throughput: 0: 11060.8. Samples: 64046574. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:33,977][24592] Avg episode reward: [(0, '4.371')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:34,321][626795] Updated weights for policy 0, policy_version 153352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:38,046][626772] Signal inference workers to stop experience collection... (950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:38,048][626772] Signal inference workers to resume experience collection... (950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:38,063][626795] InferenceWorker_p0-w0: stopping experience collection (950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:38,068][626795] InferenceWorker_p0-w0: resuming experience collection (950 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:38,783][626795] Updated weights for policy 0, policy_version 153362 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:38,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42871.5, 300 sec: 43098.3). Total num frames: 1256341504. Throughput: 0: 10303.9. Samples: 64083396. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:38,978][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:40,759][626795] Updated weights for policy 0, policy_version 153372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:42,561][626795] Updated weights for policy 0, policy_version 153382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:43,976][24592] Fps is (10 sec: 31948.5, 60 sec: 42599.4, 300 sec: 43070.4). Total num frames: 1256562688. Throughput: 0: 10260.0. Samples: 64116540. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:43,977][24592] Avg episode reward: [(0, '4.422')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:44,449][626795] Updated weights for policy 0, policy_version 153392 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:46,230][626795] Updated weights for policy 0, policy_version 153402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:47,938][626795] Updated weights for policy 0, policy_version 153412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:48,975][24592] Fps is (10 sec: 45874.6, 60 sec: 42461.8, 300 sec: 43098.3). Total num frames: 1256800256. Throughput: 0: 10932.5. Samples: 64185132. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:48,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:49,683][626795] Updated weights for policy 0, policy_version 153422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:51,386][626795] Updated weights for policy 0, policy_version 153432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:53,102][626795] Updated weights for policy 0, policy_version 153442 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:53,976][24592] Fps is (10 sec: 47515.4, 60 sec: 42461.9, 300 sec: 43487.1). Total num frames: 1257037824. Throughput: 0: 11010.8. Samples: 64256226. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:53,977][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:54,866][626795] Updated weights for policy 0, policy_version 153452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:56,484][626795] Updated weights for policy 0, policy_version 153462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:58,224][626795] Updated weights for policy 0, policy_version 153472 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:58,975][24592] Fps is (10 sec: 47514.2, 60 sec: 42598.4, 300 sec: 43487.0). Total num frames: 1257275392. Throughput: 0: 11054.1. Samples: 64292742. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:00:58,976][24592] Avg episode reward: [(0, '4.338')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:00:59,950][626795] Updated weights for policy 0, policy_version 153482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:01,611][626795] Updated weights for policy 0, policy_version 153492 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:03,362][626795] Updated weights for policy 0, policy_version 153502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:03,976][24592] Fps is (10 sec: 47512.6, 60 sec: 44782.6, 300 sec: 43514.7). Total num frames: 1257512960. Throughput: 0: 11041.1. Samples: 64363890. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:03,978][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:04,010][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000153506_1257521152.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:04,082][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000152237_1247125504.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:05,113][626795] Updated weights for policy 0, policy_version 153512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:06,874][626795] Updated weights for policy 0, policy_version 153522 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:08,586][626795] Updated weights for policy 0, policy_version 153532 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:08,975][24592] Fps is (10 sec: 47513.5, 60 sec: 45056.0, 300 sec: 43542.6). Total num frames: 1257750528. Throughput: 0: 10996.0. Samples: 64434894. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:08,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:13,077][626795] Updated weights for policy 0, policy_version 153542 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:13,976][24592] Fps is (10 sec: 33588.2, 60 sec: 42734.9, 300 sec: 43098.2). Total num frames: 1257848832. Throughput: 0: 10542.6. Samples: 64450284. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:13,977][24592] Avg episode reward: [(0, '4.319')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:15,045][626795] Updated weights for policy 0, policy_version 153552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:16,768][626795] Updated weights for policy 0, policy_version 153562 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:18,679][626795] Updated weights for policy 0, policy_version 153572 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:18,975][24592] Fps is (10 sec: 31948.9, 60 sec: 42325.3, 300 sec: 43042.7). Total num frames: 1258070016. Throughput: 0: 10176.6. Samples: 64504518. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:18,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:20,523][626795] Updated weights for policy 0, policy_version 153582 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:22,251][626795] Updated weights for policy 0, policy_version 153592 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:23,975][24592] Fps is (10 sec: 45056.3, 60 sec: 42188.7, 300 sec: 43042.7). Total num frames: 1258299392. Throughput: 0: 10881.9. Samples: 64573080. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:23,978][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:24,000][626795] Updated weights for policy 0, policy_version 153602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:25,885][626795] Updated weights for policy 0, policy_version 153612 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:27,685][626795] Updated weights for policy 0, policy_version 153622 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:28,975][24592] Fps is (10 sec: 45874.9, 60 sec: 41915.7, 300 sec: 43403.7). Total num frames: 1258528768. Throughput: 0: 10914.1. Samples: 64607670. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:28,977][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:29,378][626795] Updated weights for policy 0, policy_version 153632 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:31,179][626795] Updated weights for policy 0, policy_version 153642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:32,891][626795] Updated weights for policy 0, policy_version 153652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:33,976][24592] Fps is (10 sec: 46690.8, 60 sec: 42052.0, 300 sec: 43432.4). Total num frames: 1258766336. Throughput: 0: 10935.7. Samples: 64677246. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:33,978][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:34,621][626795] Updated weights for policy 0, policy_version 153662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:36,425][626795] Updated weights for policy 0, policy_version 153672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:38,234][626795] Updated weights for policy 0, policy_version 153682 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:38,975][24592] Fps is (10 sec: 46694.7, 60 sec: 44236.8, 300 sec: 43403.7). Total num frames: 1258995712. Throughput: 0: 10896.9. Samples: 64746582. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:38,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:40,023][626795] Updated weights for policy 0, policy_version 153692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:41,685][626795] Updated weights for policy 0, policy_version 153702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:43,492][626795] Updated weights for policy 0, policy_version 153712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:43,977][24592] Fps is (10 sec: 45874.9, 60 sec: 44373.1, 300 sec: 43375.8). Total num frames: 1259225088. Throughput: 0: 10855.8. Samples: 64781262. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:43,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:47,265][626795] Updated weights for policy 0, policy_version 153722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:48,976][24592] Fps is (10 sec: 36863.3, 60 sec: 42734.9, 300 sec: 43070.5). Total num frames: 1259364352. Throughput: 0: 10324.2. Samples: 64828476. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:48,979][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:49,024][626795] Updated weights for policy 0, policy_version 153732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:50,805][626795] Updated weights for policy 0, policy_version 153742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:52,444][626795] Updated weights for policy 0, policy_version 153752 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:53,976][24592] Fps is (10 sec: 37685.2, 60 sec: 42734.8, 300 sec: 43070.5). Total num frames: 1259601920. Throughput: 0: 10281.3. Samples: 64897554. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:53,977][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:54,337][626795] Updated weights for policy 0, policy_version 153762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:56,127][626795] Updated weights for policy 0, policy_version 153772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:57,697][626795] Updated weights for policy 0, policy_version 153782 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:58,975][24592] Fps is (10 sec: 47514.3, 60 sec: 42734.9, 300 sec: 43098.3). Total num frames: 1259839488. Throughput: 0: 10724.4. Samples: 64932882. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:01:58,977][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:01:59,516][626795] Updated weights for policy 0, policy_version 153792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:01,311][626795] Updated weights for policy 0, policy_version 153802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:02,857][626795] Updated weights for policy 0, policy_version 153812 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:03,975][24592] Fps is (10 sec: 47515.3, 60 sec: 42735.2, 300 sec: 43514.8). Total num frames: 1260077056. Throughput: 0: 11110.8. Samples: 65004504. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:03,977][24592] Avg episode reward: [(0, '4.399')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:04,617][626795] Updated weights for policy 0, policy_version 153822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:06,413][626795] Updated weights for policy 0, policy_version 153832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:08,118][626795] Updated weights for policy 0, policy_version 153842 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:08,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42598.4, 300 sec: 43514.8). Total num frames: 1260306432. Throughput: 0: 11156.1. Samples: 65075106. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:08,977][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:09,911][626795] Updated weights for policy 0, policy_version 153852 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:11,630][626795] Updated weights for policy 0, policy_version 153862 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:13,419][626795] Updated weights for policy 0, policy_version 153872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:13,976][24592] Fps is (10 sec: 45874.3, 60 sec: 44782.9, 300 sec: 43487.1). Total num frames: 1260535808. Throughput: 0: 11169.6. Samples: 65110302. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:13,978][24592] Avg episode reward: [(0, '4.324')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:15,300][626795] Updated weights for policy 0, policy_version 153882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:17,044][626795] Updated weights for policy 0, policy_version 153892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:18,887][626795] Updated weights for policy 0, policy_version 153902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:18,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44919.4, 300 sec: 43431.5). Total num frames: 1260765184. Throughput: 0: 11143.7. Samples: 65178702. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:18,976][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:23,409][626795] Updated weights for policy 0, policy_version 153912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:23,975][24592] Fps is (10 sec: 33587.7, 60 sec: 42871.5, 300 sec: 43014.9). Total num frames: 1260871680. Throughput: 0: 10425.6. Samples: 65215734. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:23,976][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:25,203][626795] Updated weights for policy 0, policy_version 153922 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:26,988][626795] Updated weights for policy 0, policy_version 153932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:28,767][626795] Updated weights for policy 0, policy_version 153942 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:28,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42734.9, 300 sec: 42987.2). Total num frames: 1261092864. Throughput: 0: 10406.6. Samples: 65249550. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:28,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:30,611][626795] Updated weights for policy 0, policy_version 153952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:32,164][626795] Updated weights for policy 0, policy_version 153962 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:33,935][626795] Updated weights for policy 0, policy_version 153972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:33,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42872.0, 300 sec: 43042.8). Total num frames: 1261338624. Throughput: 0: 10903.6. Samples: 65319138. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:33,977][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:35,695][626795] Updated weights for policy 0, policy_version 153982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:37,397][626795] Updated weights for policy 0, policy_version 153992 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:38,976][24592] Fps is (10 sec: 48332.0, 60 sec: 43007.8, 300 sec: 43431.4). Total num frames: 1261576192. Throughput: 0: 10982.6. Samples: 65391768. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:38,976][24592] Avg episode reward: [(0, '4.275')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:39,121][626795] Updated weights for policy 0, policy_version 154002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:40,801][626795] Updated weights for policy 0, policy_version 154012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:42,541][626795] Updated weights for policy 0, policy_version 154022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 43145.2, 300 sec: 43459.3). Total num frames: 1261813760. Throughput: 0: 10976.9. Samples: 65426844. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:43,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:44,234][626795] Updated weights for policy 0, policy_version 154032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:45,950][626795] Updated weights for policy 0, policy_version 154042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:47,660][626795] Updated weights for policy 0, policy_version 154052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:48,975][24592] Fps is (10 sec: 47514.3, 60 sec: 44783.0, 300 sec: 43487.1). Total num frames: 1262051328. Throughput: 0: 10973.2. Samples: 65498298. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:48,976][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:49,496][626795] Updated weights for policy 0, policy_version 154062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:51,248][626795] Updated weights for policy 0, policy_version 154072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:53,082][626795] Updated weights for policy 0, policy_version 154082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:53,975][24592] Fps is (10 sec: 46694.2, 60 sec: 44646.6, 300 sec: 43431.5). Total num frames: 1262280704. Throughput: 0: 10943.6. Samples: 65567568. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:53,978][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:57,603][626795] Updated weights for policy 0, policy_version 154092 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:58,975][24592] Fps is (10 sec: 32768.1, 60 sec: 42325.3, 300 sec: 42959.4). Total num frames: 1262379008. Throughput: 0: 10293.5. Samples: 65573508. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:02:58,976][24592] Avg episode reward: [(0, '4.329')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:02:59,401][626795] Updated weights for policy 0, policy_version 154102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:01,155][626795] Updated weights for policy 0, policy_version 154112 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:02,940][626795] Updated weights for policy 0, policy_version 154122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:03,975][24592] Fps is (10 sec: 32768.1, 60 sec: 42188.8, 300 sec: 42903.9). Total num frames: 1262608384. Throughput: 0: 10233.9. Samples: 65639226. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:03,977][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000154127_1262608384.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:04,066][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000152863_1252253696.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:04,771][626795] Updated weights for policy 0, policy_version 154132 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:06,494][626795] Updated weights for policy 0, policy_version 154142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:08,220][626795] Updated weights for policy 0, policy_version 154152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:08,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42325.3, 300 sec: 42959.4). Total num frames: 1262845952. Throughput: 0: 10960.5. Samples: 65708958. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:08,976][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:09,953][626795] Updated weights for policy 0, policy_version 154162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:11,638][626795] Updated weights for policy 0, policy_version 154172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:13,407][626795] Updated weights for policy 0, policy_version 154182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:13,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42462.0, 300 sec: 43403.7). Total num frames: 1263083520. Throughput: 0: 11005.6. Samples: 65744802. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:13,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:15,031][626795] Updated weights for policy 0, policy_version 154192 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:16,740][626795] Updated weights for policy 0, policy_version 154202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:18,483][626795] Updated weights for policy 0, policy_version 154212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:18,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42734.9, 300 sec: 43514.8). Total num frames: 1263329280. Throughput: 0: 11070.3. Samples: 65817300. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:18,976][24592] Avg episode reward: [(0, '4.329')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:20,100][626795] Updated weights for policy 0, policy_version 154222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:21,876][626795] Updated weights for policy 0, policy_version 154232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:23,715][626795] Updated weights for policy 0, policy_version 154242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:23,976][24592] Fps is (10 sec: 47513.3, 60 sec: 44782.9, 300 sec: 43487.0). Total num frames: 1263558656. Throughput: 0: 11034.3. Samples: 65888310. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:23,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:25,446][626795] Updated weights for policy 0, policy_version 154252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:27,165][626795] Updated weights for policy 0, policy_version 154262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:28,935][626795] Updated weights for policy 0, policy_version 154272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:28,975][24592] Fps is (10 sec: 46694.4, 60 sec: 45056.0, 300 sec: 43459.2). Total num frames: 1263796224. Throughput: 0: 11030.9. Samples: 65923236. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:28,977][24592] Avg episode reward: [(0, '4.351')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:33,518][626795] Updated weights for policy 0, policy_version 154282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:33,975][24592] Fps is (10 sec: 32768.2, 60 sec: 42461.8, 300 sec: 42959.4). Total num frames: 1263886336. Throughput: 0: 10268.0. Samples: 65960358. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:33,977][24592] Avg episode reward: [(0, '4.390')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:35,403][626795] Updated weights for policy 0, policy_version 154292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:37,163][626795] Updated weights for policy 0, policy_version 154302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:38,878][626795] Updated weights for policy 0, policy_version 154312 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:38,975][24592] Fps is (10 sec: 32768.1, 60 sec: 42462.0, 300 sec: 42959.5). Total num frames: 1264123904. Throughput: 0: 10232.8. Samples: 66028044. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:38,976][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:40,686][626795] Updated weights for policy 0, policy_version 154322 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:42,527][626795] Updated weights for policy 0, policy_version 154332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:43,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42325.3, 300 sec: 42931.6). Total num frames: 1264353280. Throughput: 0: 10878.4. Samples: 66063036. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:43,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:44,178][626795] Updated weights for policy 0, policy_version 154342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:45,929][626795] Updated weights for policy 0, policy_version 154352 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:47,569][626795] Updated weights for policy 0, policy_version 154362 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:48,976][24592] Fps is (10 sec: 46692.9, 60 sec: 42325.2, 300 sec: 43375.9). Total num frames: 1264590848. Throughput: 0: 11008.5. Samples: 66134610. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:48,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:49,349][626795] Updated weights for policy 0, policy_version 154372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:51,024][626795] Updated weights for policy 0, policy_version 154382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:52,800][626795] Updated weights for policy 0, policy_version 154392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:53,976][24592] Fps is (10 sec: 47512.5, 60 sec: 42461.7, 300 sec: 43431.5). Total num frames: 1264828416. Throughput: 0: 11034.1. Samples: 66205494. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:53,977][24592] Avg episode reward: [(0, '4.318')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:54,549][626795] Updated weights for policy 0, policy_version 154402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:56,229][626795] Updated weights for policy 0, policy_version 154412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:58,027][626795] Updated weights for policy 0, policy_version 154422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:58,975][24592] Fps is (10 sec: 47515.0, 60 sec: 44782.9, 300 sec: 43431.5). Total num frames: 1265065984. Throughput: 0: 11016.9. Samples: 66240564. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:03:58,977][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:03:59,825][626795] Updated weights for policy 0, policy_version 154432 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:01,598][626795] Updated weights for policy 0, policy_version 154442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:03,328][626795] Updated weights for policy 0, policy_version 154452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:03,976][24592] Fps is (10 sec: 46693.2, 60 sec: 44782.5, 300 sec: 43403.7). Total num frames: 1265295360. Throughput: 0: 10955.6. Samples: 66310308. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:03,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:07,825][626795] Updated weights for policy 0, policy_version 154462 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:08,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42598.4, 300 sec: 42931.6). Total num frames: 1265401856. Throughput: 0: 10207.7. Samples: 66347658. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:08,977][24592] Avg episode reward: [(0, '4.415')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:09,658][626795] Updated weights for policy 0, policy_version 154472 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:11,548][626795] Updated weights for policy 0, policy_version 154482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:13,330][626795] Updated weights for policy 0, policy_version 154492 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:13,975][24592] Fps is (10 sec: 33589.2, 60 sec: 42461.9, 300 sec: 42903.9). Total num frames: 1265631232. Throughput: 0: 10185.6. Samples: 66381588. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:13,976][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:15,007][626795] Updated weights for policy 0, policy_version 154502 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:16,784][626795] Updated weights for policy 0, policy_version 154512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:18,404][626795] Updated weights for policy 0, policy_version 154522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:18,975][24592] Fps is (10 sec: 46694.8, 60 sec: 42325.4, 300 sec: 42931.9). Total num frames: 1265868800. Throughput: 0: 10929.9. Samples: 66452202. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:18,976][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:20,229][626795] Updated weights for policy 0, policy_version 154532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:21,951][626795] Updated weights for policy 0, policy_version 154542 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:23,527][626795] Updated weights for policy 0, policy_version 154552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:23,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42325.4, 300 sec: 43348.2). Total num frames: 1266098176. Throughput: 0: 11019.3. Samples: 66523914. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:23,976][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:25,314][626795] Updated weights for policy 0, policy_version 154562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:27,036][626795] Updated weights for policy 0, policy_version 154572 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:28,764][626795] Updated weights for policy 0, policy_version 154582 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:28,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42461.9, 300 sec: 43403.8). Total num frames: 1266343936. Throughput: 0: 11047.2. Samples: 66560160. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:28,977][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:30,382][626795] Updated weights for policy 0, policy_version 154592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:32,158][626795] Updated weights for policy 0, policy_version 154602 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:33,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44783.0, 300 sec: 43403.7). Total num frames: 1266573312. Throughput: 0: 11021.7. Samples: 66630582. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:33,976][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:34,016][626795] Updated weights for policy 0, policy_version 154612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:35,732][626795] Updated weights for policy 0, policy_version 154622 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:37,493][626795] Updated weights for policy 0, policy_version 154632 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:41,608][24592] Fps is (10 sec: 36962.1, 60 sec: 42900.3, 300 sec: 43020.0). Total num frames: 1266810880. Throughput: 0: 10388.6. Samples: 66700332. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:41,609][24592] Avg episode reward: [(0, '4.370')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:41,980][626795] Updated weights for policy 0, policy_version 154642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:43,837][626795] Updated weights for policy 0, policy_version 154652 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:43,976][24592] Fps is (10 sec: 33585.6, 60 sec: 42598.1, 300 sec: 42903.8). Total num frames: 1266909184. Throughput: 0: 10303.8. Samples: 66704238. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:43,977][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:45,613][626795] Updated weights for policy 0, policy_version 154662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:47,432][626795] Updated weights for policy 0, policy_version 154672 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:48,975][24592] Fps is (10 sec: 45591.3, 60 sec: 42598.6, 300 sec: 42903.9). Total num frames: 1267146752. Throughput: 0: 10262.1. Samples: 66772098. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:48,976][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:49,102][626795] Updated weights for policy 0, policy_version 154682 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:50,815][626795] Updated weights for policy 0, policy_version 154692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:52,524][626795] Updated weights for policy 0, policy_version 154702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:53,975][24592] Fps is (10 sec: 47515.7, 60 sec: 42598.6, 300 sec: 42931.6). Total num frames: 1267384320. Throughput: 0: 11031.1. Samples: 66844056. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:53,977][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:54,182][626795] Updated weights for policy 0, policy_version 154712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:56,001][626795] Updated weights for policy 0, policy_version 154722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:57,618][626795] Updated weights for policy 0, policy_version 154732 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:58,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42598.4, 300 sec: 43375.9). Total num frames: 1267621888. Throughput: 0: 11075.5. Samples: 66879984. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:04:58,979][24592] Avg episode reward: [(0, '4.251')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:04:59,370][626795] Updated weights for policy 0, policy_version 154742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:01,060][626795] Updated weights for policy 0, policy_version 154752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:02,784][626795] Updated weights for policy 0, policy_version 154762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:03,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42871.9, 300 sec: 43459.3). Total num frames: 1267867648. Throughput: 0: 11107.0. Samples: 66952020. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:03,977][24592] Avg episode reward: [(0, '4.363')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000154769_1267867648.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:04,042][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000153506_1257521152.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:04,489][626795] Updated weights for policy 0, policy_version 154772 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:06,362][626795] Updated weights for policy 0, policy_version 154782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:08,203][626795] Updated weights for policy 0, policy_version 154792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:08,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44919.5, 300 sec: 43431.5). Total num frames: 1268097024. Throughput: 0: 11044.9. Samples: 67020936. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:08,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:09,813][626795] Updated weights for policy 0, policy_version 154802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:11,708][626795] Updated weights for policy 0, policy_version 154812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:13,429][626795] Updated weights for policy 0, policy_version 154822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:16,583][24592] Fps is (10 sec: 35087.7, 60 sec: 42786.9, 300 sec: 42940.9). Total num frames: 1268310016. Throughput: 0: 10409.5. Samples: 67055730. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:16,584][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:18,023][626795] Updated weights for policy 0, policy_version 154832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:18,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42461.8, 300 sec: 42876.1). Total num frames: 1268416512. Throughput: 0: 10254.0. Samples: 67092012. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:18,977][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:19,895][626795] Updated weights for policy 0, policy_version 154842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:21,700][626795] Updated weights for policy 0, policy_version 154852 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:23,435][626795] Updated weights for policy 0, policy_version 154862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:23,975][24592] Fps is (10 sec: 46542.9, 60 sec: 42598.5, 300 sec: 42848.3). Total num frames: 1268654080. Throughput: 0: 10882.6. Samples: 67161396. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:23,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:25,062][626795] Updated weights for policy 0, policy_version 154872 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:26,710][626795] Updated weights for policy 0, policy_version 154882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:28,526][626795] Updated weights for policy 0, policy_version 154892 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:28,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42461.8, 300 sec: 42876.2). Total num frames: 1268891648. Throughput: 0: 10959.4. Samples: 67197408. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:28,977][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:30,163][626795] Updated weights for policy 0, policy_version 154902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:31,908][626795] Updated weights for policy 0, policy_version 154912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:33,600][626795] Updated weights for policy 0, policy_version 154922 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:33,975][24592] Fps is (10 sec: 48332.4, 60 sec: 42734.9, 300 sec: 43375.9). Total num frames: 1269137408. Throughput: 0: 11034.5. Samples: 67268652. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:33,977][24592] Avg episode reward: [(0, '4.391')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:35,338][626795] Updated weights for policy 0, policy_version 154932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:37,055][626795] Updated weights for policy 0, policy_version 154942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:38,763][626795] Updated weights for policy 0, policy_version 154952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:38,975][24592] Fps is (10 sec: 48332.9, 60 sec: 44696.3, 300 sec: 43431.6). Total num frames: 1269374976. Throughput: 0: 11038.5. Samples: 67340790. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:38,978][24592] Avg episode reward: [(0, '4.308')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:40,572][626795] Updated weights for policy 0, policy_version 154962 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:42,280][626795] Updated weights for policy 0, policy_version 154972 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:43,975][24592] Fps is (10 sec: 46694.0, 60 sec: 44919.7, 300 sec: 43403.7). Total num frames: 1269604352. Throughput: 0: 11008.2. Samples: 67375356. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:43,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:44,053][626795] Updated weights for policy 0, policy_version 154982 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:45,911][626795] Updated weights for policy 0, policy_version 154992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:47,557][626795] Updated weights for policy 0, policy_version 155002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:51,521][24592] Fps is (10 sec: 35913.1, 60 sec: 42829.1, 300 sec: 42977.3). Total num frames: 1269825536. Throughput: 0: 10367.0. Samples: 67444926. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:51,522][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:52,226][626795] Updated weights for policy 0, policy_version 155012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:53,975][24592] Fps is (10 sec: 32768.0, 60 sec: 42461.8, 300 sec: 42903.9). Total num frames: 1269932032. Throughput: 0: 10223.6. Samples: 67480998. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:53,976][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:54,170][626795] Updated weights for policy 0, policy_version 155022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:55,654][626772] Signal inference workers to stop experience collection... (1000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:55,658][626772] Signal inference workers to resume experience collection... (1000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:55,668][626795] InferenceWorker_p0-w0: stopping experience collection (1000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:55,675][626795] InferenceWorker_p0-w0: resuming experience collection (1000 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:55,853][626795] Updated weights for policy 0, policy_version 155032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:57,660][626795] Updated weights for policy 0, policy_version 155042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:58,975][24592] Fps is (10 sec: 45058.7, 60 sec: 42325.3, 300 sec: 42876.2). Total num frames: 1270161408. Throughput: 0: 10823.9. Samples: 67514580. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:05:58,978][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:05:59,316][626795] Updated weights for policy 0, policy_version 155052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:01,139][626795] Updated weights for policy 0, policy_version 155062 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:02,793][626795] Updated weights for policy 0, policy_version 155072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:03,975][24592] Fps is (10 sec: 46694.8, 60 sec: 42188.8, 300 sec: 42876.1). Total num frames: 1270398976. Throughput: 0: 10982.4. Samples: 67586220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:03,976][24592] Avg episode reward: [(0, '4.399')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:04,492][626795] Updated weights for policy 0, policy_version 155082 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:06,257][626795] Updated weights for policy 0, policy_version 155092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:08,006][626795] Updated weights for policy 0, policy_version 155102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:08,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42325.3, 300 sec: 43348.2). Total num frames: 1270636544. Throughput: 0: 11032.4. Samples: 67657854. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:08,977][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:09,726][626795] Updated weights for policy 0, policy_version 155112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:11,373][626795] Updated weights for policy 0, policy_version 155122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:13,043][626795] Updated weights for policy 0, policy_version 155132 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:13,975][24592] Fps is (10 sec: 47513.1, 60 sec: 44676.4, 300 sec: 43403.7). Total num frames: 1270874112. Throughput: 0: 11028.4. Samples: 67693686. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:13,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:14,885][626795] Updated weights for policy 0, policy_version 155142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:16,596][626795] Updated weights for policy 0, policy_version 155152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:18,429][626795] Updated weights for policy 0, policy_version 155162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:18,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44783.0, 300 sec: 43403.7). Total num frames: 1271103488. Throughput: 0: 11002.0. Samples: 67763742. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:18,976][24592] Avg episode reward: [(0, '4.336')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:20,250][626795] Updated weights for policy 0, policy_version 155172 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:21,985][626795] Updated weights for policy 0, policy_version 155182 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:26,439][24592] Fps is (10 sec: 36149.2, 60 sec: 42754.0, 300 sec: 43016.7). Total num frames: 1271324672. Throughput: 0: 10349.8. Samples: 67832034. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:26,440][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:26,483][626795] Updated weights for policy 0, policy_version 155192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:28,454][626795] Updated weights for policy 0, policy_version 155202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:28,976][24592] Fps is (10 sec: 32767.1, 60 sec: 42325.2, 300 sec: 42931.7). Total num frames: 1271431168. Throughput: 0: 10225.6. Samples: 67835508. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:28,978][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:30,144][626795] Updated weights for policy 0, policy_version 155212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:31,879][626795] Updated weights for policy 0, policy_version 155222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:33,541][626795] Updated weights for policy 0, policy_version 155232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:33,976][24592] Fps is (10 sec: 45650.0, 60 sec: 42188.1, 300 sec: 42959.3). Total num frames: 1271668736. Throughput: 0: 10815.5. Samples: 67904100. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:33,977][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:35,299][626795] Updated weights for policy 0, policy_version 155242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:37,021][626795] Updated weights for policy 0, policy_version 155252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:38,723][626795] Updated weights for policy 0, policy_version 155262 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:38,975][24592] Fps is (10 sec: 48333.9, 60 sec: 42325.4, 300 sec: 43015.1). Total num frames: 1271914496. Throughput: 0: 11021.8. Samples: 67976976. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:38,977][24592] Avg episode reward: [(0, '4.369')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:40,444][626795] Updated weights for policy 0, policy_version 155272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:42,141][626795] Updated weights for policy 0, policy_version 155282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:43,821][626795] Updated weights for policy 0, policy_version 155292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:43,975][24592] Fps is (10 sec: 48337.5, 60 sec: 42461.9, 300 sec: 43348.2). Total num frames: 1272152064. Throughput: 0: 11058.3. Samples: 68012202. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:43,977][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:45,477][626795] Updated weights for policy 0, policy_version 155302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:47,233][626795] Updated weights for policy 0, policy_version 155312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:48,913][626795] Updated weights for policy 0, policy_version 155322 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:48,975][24592] Fps is (10 sec: 48332.5, 60 sec: 44771.1, 300 sec: 43376.0). Total num frames: 1272397824. Throughput: 0: 11071.3. Samples: 68084430. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:48,977][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:50,683][626795] Updated weights for policy 0, policy_version 155332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:52,464][626795] Updated weights for policy 0, policy_version 155342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:53,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44919.5, 300 sec: 43348.2). Total num frames: 1272627200. Throughput: 0: 11043.3. Samples: 68154804. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:06:53,976][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:54,244][626795] Updated weights for policy 0, policy_version 155352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:56,009][626795] Updated weights for policy 0, policy_version 155362 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:06:57,770][626795] Updated weights for policy 0, policy_version 155372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:01,347][24592] Fps is (10 sec: 35758.0, 60 sec: 42949.0, 300 sec: 42919.9). Total num frames: 1272840192. Throughput: 0: 10464.0. Samples: 68189376. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:01,348][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:02,458][626795] Updated weights for policy 0, policy_version 155382 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:03,976][24592] Fps is (10 sec: 32765.1, 60 sec: 42597.7, 300 sec: 42876.0). Total num frames: 1272954880. Throughput: 0: 10256.4. Samples: 68225292. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:03,978][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000155390_1272954880.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:04,067][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000154127_1262608384.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:04,313][626795] Updated weights for policy 0, policy_version 155392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:06,093][626795] Updated weights for policy 0, policy_version 155402 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:07,748][626795] Updated weights for policy 0, policy_version 155412 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:08,975][24592] Fps is (10 sec: 45100.2, 60 sec: 42461.8, 300 sec: 42876.1). Total num frames: 1273184256. Throughput: 0: 10864.5. Samples: 68294166. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:08,976][24592] Avg episode reward: [(0, '4.284')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:09,570][626795] Updated weights for policy 0, policy_version 155422 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:11,288][626795] Updated weights for policy 0, policy_version 155432 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:12,932][626795] Updated weights for policy 0, policy_version 155442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:13,975][24592] Fps is (10 sec: 46698.7, 60 sec: 42461.9, 300 sec: 42903.9). Total num frames: 1273421824. Throughput: 0: 11005.4. Samples: 68330748. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:13,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:14,701][626795] Updated weights for policy 0, policy_version 155452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:16,326][626795] Updated weights for policy 0, policy_version 155462 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:18,092][626795] Updated weights for policy 0, policy_version 155472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:18,976][24592] Fps is (10 sec: 48331.6, 60 sec: 42734.7, 300 sec: 43375.9). Total num frames: 1273667584. Throughput: 0: 11070.4. Samples: 68402262. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:18,978][24592] Avg episode reward: [(0, '4.394')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:19,814][626795] Updated weights for policy 0, policy_version 155482 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:21,414][626795] Updated weights for policy 0, policy_version 155492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:23,178][626795] Updated weights for policy 0, policy_version 155502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:23,976][24592] Fps is (10 sec: 48329.7, 60 sec: 44849.3, 300 sec: 43431.4). Total num frames: 1273905152. Throughput: 0: 11055.0. Samples: 68474460. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:23,977][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:24,919][626795] Updated weights for policy 0, policy_version 155512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:26,621][626795] Updated weights for policy 0, policy_version 155522 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:28,384][626795] Updated weights for policy 0, policy_version 155532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:28,975][24592] Fps is (10 sec: 47515.2, 60 sec: 45192.7, 300 sec: 43403.7). Total num frames: 1274142720. Throughput: 0: 11059.2. Samples: 68509866. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:28,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:30,152][626795] Updated weights for policy 0, policy_version 155542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:32,015][626795] Updated weights for policy 0, policy_version 155552 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:36,263][24592] Fps is (10 sec: 36001.8, 60 sec: 43007.1, 300 sec: 42959.5). Total num frames: 1274347520. Throughput: 0: 10661.3. Samples: 68588580. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:36,265][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:36,610][626795] Updated weights for policy 0, policy_version 155562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:38,457][626795] Updated weights for policy 0, policy_version 155572 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:38,977][24592] Fps is (10 sec: 31941.9, 60 sec: 42460.3, 300 sec: 42875.8). Total num frames: 1274462208. Throughput: 0: 10189.7. Samples: 68613360. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:38,978][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:40,334][626795] Updated weights for policy 0, policy_version 155582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:42,088][626795] Updated weights for policy 0, policy_version 155592 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:43,924][626795] Updated weights for policy 0, policy_version 155602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:43,975][24592] Fps is (10 sec: 45676.2, 60 sec: 42461.8, 300 sec: 42876.1). Total num frames: 1274699776. Throughput: 0: 10748.1. Samples: 68647554. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:43,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:45,440][626795] Updated weights for policy 0, policy_version 155612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:47,188][626795] Updated weights for policy 0, policy_version 155622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:48,887][626795] Updated weights for policy 0, policy_version 155632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:48,975][24592] Fps is (10 sec: 47523.8, 60 sec: 42325.4, 300 sec: 42903.9). Total num frames: 1274937344. Throughput: 0: 10992.8. Samples: 68719956. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:48,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:50,610][626795] Updated weights for policy 0, policy_version 155642 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:52,322][626795] Updated weights for policy 0, policy_version 155652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:53,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42461.9, 300 sec: 43375.9). Total num frames: 1275174912. Throughput: 0: 11065.5. Samples: 68792112. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:53,976][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:54,000][626795] Updated weights for policy 0, policy_version 155662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:55,726][626795] Updated weights for policy 0, policy_version 155672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:57,515][626795] Updated weights for policy 0, policy_version 155682 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:58,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44635.5, 300 sec: 43403.7). Total num frames: 1275412480. Throughput: 0: 11048.3. Samples: 68827920. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:07:58,976][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:07:59,145][626795] Updated weights for policy 0, policy_version 155692 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:00,846][626795] Updated weights for policy 0, policy_version 155702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:02,561][626795] Updated weights for policy 0, policy_version 155712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:03,976][24592] Fps is (10 sec: 47509.7, 60 sec: 44919.5, 300 sec: 43403.6). Total num frames: 1275650048. Throughput: 0: 11046.1. Samples: 68899344. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:03,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:04,333][626795] Updated weights for policy 0, policy_version 155722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:06,234][626795] Updated weights for policy 0, policy_version 155732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:07,942][626795] Updated weights for policy 0, policy_version 155742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:11,157][24592] Fps is (10 sec: 36315.0, 60 sec: 42948.5, 300 sec: 42974.9). Total num frames: 1275854848. Throughput: 0: 9750.7. Samples: 68934504. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:11,158][24592] Avg episode reward: [(0, '4.279')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:12,633][626795] Updated weights for policy 0, policy_version 155752 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:13,976][24592] Fps is (10 sec: 32770.5, 60 sec: 42598.3, 300 sec: 42876.1). Total num frames: 1275977728. Throughput: 0: 10235.9. Samples: 68970480. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:13,979][24592] Avg episode reward: [(0, '4.385')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:14,572][626795] Updated weights for policy 0, policy_version 155762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:16,342][626795] Updated weights for policy 0, policy_version 155772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:18,060][626795] Updated weights for policy 0, policy_version 155782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:18,976][24592] Fps is (10 sec: 45048.8, 60 sec: 42325.0, 300 sec: 42876.0). Total num frames: 1276207104. Throughput: 0: 10504.6. Samples: 69037260. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:18,978][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:19,744][626795] Updated weights for policy 0, policy_version 155792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:21,550][626795] Updated weights for policy 0, policy_version 155802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:23,207][626795] Updated weights for policy 0, policy_version 155812 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:23,975][24592] Fps is (10 sec: 46694.0, 60 sec: 42325.7, 300 sec: 42876.1). Total num frames: 1276444672. Throughput: 0: 11011.3. Samples: 69108846. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:23,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:24,886][626795] Updated weights for policy 0, policy_version 155822 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:26,574][626795] Updated weights for policy 0, policy_version 155832 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:28,267][626795] Updated weights for policy 0, policy_version 155842 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:28,976][24592] Fps is (10 sec: 48333.4, 60 sec: 42461.4, 300 sec: 43403.6). Total num frames: 1276690432. Throughput: 0: 11057.6. Samples: 69145152. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:28,978][24592] Avg episode reward: [(0, '4.297')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:29,983][626795] Updated weights for policy 0, policy_version 155852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:31,730][626795] Updated weights for policy 0, policy_version 155862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:33,398][626795] Updated weights for policy 0, policy_version 155872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:33,977][24592] Fps is (10 sec: 48326.6, 60 sec: 44711.9, 300 sec: 43403.5). Total num frames: 1276928000. Throughput: 0: 11050.0. Samples: 69217224. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:33,977][24592] Avg episode reward: [(0, '4.240')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:35,133][626795] Updated weights for policy 0, policy_version 155882 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:36,815][626795] Updated weights for policy 0, policy_version 155892 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:38,557][626795] Updated weights for policy 0, policy_version 155902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:38,975][24592] Fps is (10 sec: 47516.9, 60 sec: 45057.6, 300 sec: 43431.5). Total num frames: 1277165568. Throughput: 0: 11046.7. Samples: 69289212. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:38,976][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:40,295][626795] Updated weights for policy 0, policy_version 155912 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:42,306][626795] Updated weights for policy 0, policy_version 155922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:46,072][24592] Fps is (10 sec: 35218.7, 60 sec: 42743.1, 300 sec: 42959.6). Total num frames: 1277353984. Throughput: 0: 10493.3. Samples: 69322122. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:46,073][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:46,904][626795] Updated weights for policy 0, policy_version 155932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:48,805][626795] Updated weights for policy 0, policy_version 155942 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:48,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42325.3, 300 sec: 42876.1). Total num frames: 1277476864. Throughput: 0: 10180.3. Samples: 69357450. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:48,976][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:50,803][626795] Updated weights for policy 0, policy_version 155952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:52,586][626795] Updated weights for policy 0, policy_version 155962 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:53,976][24592] Fps is (10 sec: 43532.0, 60 sec: 42051.9, 300 sec: 42820.5). Total num frames: 1277698048. Throughput: 0: 11428.8. Samples: 69423876. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:53,977][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:54,298][626795] Updated weights for policy 0, policy_version 155972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:56,139][626795] Updated weights for policy 0, policy_version 155982 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:57,822][626795] Updated weights for policy 0, policy_version 155992 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:58,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42052.3, 300 sec: 42848.4). Total num frames: 1277935616. Throughput: 0: 10834.0. Samples: 69458010. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:08:58,976][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:08:59,653][626795] Updated weights for policy 0, policy_version 156002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:01,407][626795] Updated weights for policy 0, policy_version 156012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:03,152][626795] Updated weights for policy 0, policy_version 156022 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:03,975][24592] Fps is (10 sec: 46696.8, 60 sec: 41916.3, 300 sec: 43264.9). Total num frames: 1278164992. Throughput: 0: 10891.5. Samples: 69527370. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:03,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000156026_1278164992.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:04,052][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000154769_1267867648.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:05,011][626795] Updated weights for policy 0, policy_version 156032 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:06,774][626795] Updated weights for policy 0, policy_version 156042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:08,500][626795] Updated weights for policy 0, policy_version 156052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:08,975][24592] Fps is (10 sec: 45055.5, 60 sec: 43780.4, 300 sec: 43237.1). Total num frames: 1278386176. Throughput: 0: 10822.0. Samples: 69595836. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:08,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:10,396][626795] Updated weights for policy 0, policy_version 156062 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:12,121][626795] Updated weights for policy 0, policy_version 156072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:13,855][626795] Updated weights for policy 0, policy_version 156082 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:13,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44100.3, 300 sec: 43237.1). Total num frames: 1278623744. Throughput: 0: 10791.1. Samples: 69630744. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:13,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:15,709][626795] Updated weights for policy 0, policy_version 156092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:17,421][626795] Updated weights for policy 0, policy_version 156102 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:20,138][24592] Fps is (10 sec: 39630.4, 60 sec: 42860.9, 300 sec: 42984.4). Total num frames: 1278828544. Throughput: 0: 10455.6. Samples: 69699864. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:20,139][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:21,130][626795] Updated weights for policy 0, policy_version 156112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:22,873][626795] Updated weights for policy 0, policy_version 156122 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:23,975][24592] Fps is (10 sec: 37683.0, 60 sec: 42598.5, 300 sec: 42903.9). Total num frames: 1279000576. Throughput: 0: 10196.3. Samples: 69748044. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:23,977][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:24,572][626795] Updated weights for policy 0, policy_version 156132 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:26,327][626795] Updated weights for policy 0, policy_version 156142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:28,038][626795] Updated weights for policy 0, policy_version 156152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:28,975][24592] Fps is (10 sec: 46347.9, 60 sec: 42462.4, 300 sec: 42931.6). Total num frames: 1279238144. Throughput: 0: 10749.0. Samples: 69783288. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:28,993][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:29,738][626795] Updated weights for policy 0, policy_version 156162 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:31,412][626795] Updated weights for policy 0, policy_version 156172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:33,150][626795] Updated weights for policy 0, policy_version 156182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:33,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42462.8, 300 sec: 43318.3). Total num frames: 1279475712. Throughput: 0: 11066.2. Samples: 69855432. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:33,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:34,891][626795] Updated weights for policy 0, policy_version 156192 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:36,768][626795] Updated weights for policy 0, policy_version 156202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:38,430][626795] Updated weights for policy 0, policy_version 156212 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:38,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42461.9, 300 sec: 43403.8). Total num frames: 1279713280. Throughput: 0: 11139.3. Samples: 69925140. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:38,976][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:40,183][626795] Updated weights for policy 0, policy_version 156222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:42,017][626795] Updated weights for policy 0, policy_version 156232 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:43,781][626795] Updated weights for policy 0, policy_version 156242 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:43,975][24592] Fps is (10 sec: 46694.8, 60 sec: 44706.9, 300 sec: 43376.0). Total num frames: 1279942656. Throughput: 0: 11155.1. Samples: 69959988. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:43,977][24592] Avg episode reward: [(0, '4.336')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:45,506][626795] Updated weights for policy 0, policy_version 156252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:47,274][626795] Updated weights for policy 0, policy_version 156262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:48,977][24592] Fps is (10 sec: 45867.6, 60 sec: 44918.2, 300 sec: 43347.9). Total num frames: 1280172032. Throughput: 0: 11156.4. Samples: 70029426. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:48,978][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:49,032][626795] Updated weights for policy 0, policy_version 156272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:50,870][626795] Updated weights for policy 0, policy_version 156282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:54,848][24592] Fps is (10 sec: 35410.8, 60 sec: 43199.0, 300 sec: 42943.4). Total num frames: 1280327680. Throughput: 0: 10195.7. Samples: 70063542. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:54,850][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:55,166][626795] Updated weights for policy 0, policy_version 156292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:57,026][626795] Updated weights for policy 0, policy_version 156302 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:09:58,893][626795] Updated weights for policy 0, policy_version 156312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:58,976][24592] Fps is (10 sec: 33592.3, 60 sec: 42871.3, 300 sec: 42848.3). Total num frames: 1280507904. Throughput: 0: 10486.6. Samples: 70102644. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:09:58,978][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:00,653][626795] Updated weights for policy 0, policy_version 156322 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:02,445][626795] Updated weights for policy 0, policy_version 156332 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:03,975][24592] Fps is (10 sec: 44878.0, 60 sec: 42871.5, 300 sec: 42848.3). Total num frames: 1280737280. Throughput: 0: 10737.1. Samples: 70170552. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:03,977][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:04,254][626795] Updated weights for policy 0, policy_version 156342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:06,061][626795] Updated weights for policy 0, policy_version 156352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:07,750][626795] Updated weights for policy 0, policy_version 156362 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:08,975][24592] Fps is (10 sec: 46695.2, 60 sec: 43144.6, 300 sec: 43314.5). Total num frames: 1280974848. Throughput: 0: 10934.7. Samples: 70240104. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:08,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:09,570][626795] Updated weights for policy 0, policy_version 156372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:11,303][626795] Updated weights for policy 0, policy_version 156382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:13,113][626795] Updated weights for policy 0, policy_version 156392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:13,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42871.4, 300 sec: 43320.4). Total num frames: 1281196032. Throughput: 0: 10926.0. Samples: 70274958. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:13,978][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:14,799][626795] Updated weights for policy 0, policy_version 156402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:16,616][626795] Updated weights for policy 0, policy_version 156412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:18,346][626795] Updated weights for policy 0, policy_version 156422 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:18,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44275.4, 300 sec: 43320.4). Total num frames: 1281433600. Throughput: 0: 10878.3. Samples: 70344954. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:18,977][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:20,139][626795] Updated weights for policy 0, policy_version 156432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:21,800][626795] Updated weights for policy 0, policy_version 156442 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:23,584][626795] Updated weights for policy 0, policy_version 156452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:23,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44509.9, 300 sec: 43320.4). Total num frames: 1281671168. Throughput: 0: 10892.4. Samples: 70415298. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:23,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:25,417][626795] Updated weights for policy 0, policy_version 156462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:29,472][24592] Fps is (10 sec: 35901.8, 60 sec: 42519.8, 300 sec: 42887.3). Total num frames: 1281810432. Throughput: 0: 10758.2. Samples: 70449444. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:29,473][24592] Avg episode reward: [(0, '4.328')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:29,545][626795] Updated weights for policy 0, policy_version 156472 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:31,304][626795] Updated weights for policy 0, policy_version 156482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:33,063][626795] Updated weights for policy 0, policy_version 156492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:33,975][24592] Fps is (10 sec: 35225.5, 60 sec: 42461.9, 300 sec: 42876.1). Total num frames: 1282023424. Throughput: 0: 10268.6. Samples: 70491498. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:33,977][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:34,873][626795] Updated weights for policy 0, policy_version 156502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:36,631][626795] Updated weights for policy 0, policy_version 156512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:38,384][626795] Updated weights for policy 0, policy_version 156522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:38,975][24592] Fps is (10 sec: 45684.4, 60 sec: 42188.8, 300 sec: 42848.3). Total num frames: 1282244608. Throughput: 0: 11266.9. Samples: 70560714. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:38,976][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:40,186][626795] Updated weights for policy 0, policy_version 156532 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:41,893][626795] Updated weights for policy 0, policy_version 156542 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:43,666][626795] Updated weights for policy 0, policy_version 156552 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:43,976][24592] Fps is (10 sec: 45873.8, 60 sec: 42325.1, 300 sec: 43277.3). Total num frames: 1282482176. Throughput: 0: 10945.8. Samples: 70595208. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:43,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:45,540][626795] Updated weights for policy 0, policy_version 156562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:47,427][626795] Updated weights for policy 0, policy_version 156572 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:48,976][24592] Fps is (10 sec: 45874.3, 60 sec: 42189.8, 300 sec: 43292.6). Total num frames: 1282703360. Throughput: 0: 10946.9. Samples: 70663164. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:48,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:49,198][626795] Updated weights for policy 0, policy_version 156582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:50,991][626795] Updated weights for policy 0, policy_version 156592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:52,728][626795] Updated weights for policy 0, policy_version 156602 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:53,975][24592] Fps is (10 sec: 45876.6, 60 sec: 44197.3, 300 sec: 43320.4). Total num frames: 1282940928. Throughput: 0: 10931.9. Samples: 70732038. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:53,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:54,459][626795] Updated weights for policy 0, policy_version 156612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:56,256][626795] Updated weights for policy 0, policy_version 156622 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:57,962][626795] Updated weights for policy 0, policy_version 156632 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:58,975][24592] Fps is (10 sec: 46695.4, 60 sec: 44373.5, 300 sec: 43292.6). Total num frames: 1283170304. Throughput: 0: 10935.3. Samples: 70767048. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:10:58,977][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:10:59,803][626795] Updated weights for policy 0, policy_version 156642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:01,578][626795] Updated weights for policy 0, policy_version 156652 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:04,172][24592] Fps is (10 sec: 34547.3, 60 sec: 42459.5, 300 sec: 42875.3). Total num frames: 1283293184. Throughput: 0: 10111.1. Samples: 70801938. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:04,172][24592] Avg episode reward: [(0, '4.157')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:04,182][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000156653_1283301376.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:04,246][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000155390_1272954880.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:05,807][626795] Updated weights for policy 0, policy_version 156662 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:07,623][626795] Updated weights for policy 0, policy_version 156672 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:08,976][24592] Fps is (10 sec: 34405.9, 60 sec: 42325.2, 300 sec: 42848.3). Total num frames: 1283514368. Throughput: 0: 10259.7. Samples: 70876986. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:08,977][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:09,315][626795] Updated weights for policy 0, policy_version 156682 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:11,160][626795] Updated weights for policy 0, policy_version 156692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:12,990][626795] Updated weights for policy 0, policy_version 156702 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:13,975][24592] Fps is (10 sec: 45958.3, 60 sec: 42461.9, 300 sec: 42848.3). Total num frames: 1283743744. Throughput: 0: 10369.7. Samples: 70910934. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:13,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:14,739][626795] Updated weights for policy 0, policy_version 156712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:16,509][626795] Updated weights for policy 0, policy_version 156722 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:18,302][626795] Updated weights for policy 0, policy_version 156732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:18,976][24592] Fps is (10 sec: 45874.1, 60 sec: 42325.1, 300 sec: 43237.2). Total num frames: 1283973120. Throughput: 0: 10860.4. Samples: 70980222. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:18,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:20,027][626795] Updated weights for policy 0, policy_version 156742 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:21,797][626795] Updated weights for policy 0, policy_version 156752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:23,584][626795] Updated weights for policy 0, policy_version 156762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:23,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42325.3, 300 sec: 43320.4). Total num frames: 1284210688. Throughput: 0: 10888.7. Samples: 71050704. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:23,977][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:25,181][626795] Updated weights for policy 0, policy_version 156772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:27,058][626795] Updated weights for policy 0, policy_version 156782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:28,807][626795] Updated weights for policy 0, policy_version 156792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:28,975][24592] Fps is (10 sec: 46696.0, 60 sec: 44192.6, 300 sec: 43292.8). Total num frames: 1284440064. Throughput: 0: 10901.4. Samples: 71085768. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:28,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:30,509][626795] Updated weights for policy 0, policy_version 156802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:32,327][626795] Updated weights for policy 0, policy_version 156812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:33,975][24592] Fps is (10 sec: 46694.1, 60 sec: 44236.7, 300 sec: 43264.9). Total num frames: 1284677632. Throughput: 0: 10937.9. Samples: 71155368. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:33,978][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:34,090][626795] Updated weights for policy 0, policy_version 156822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:35,941][626795] Updated weights for policy 0, policy_version 156832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:38,975][24592] Fps is (10 sec: 35225.7, 60 sec: 42461.9, 300 sec: 42848.3). Total num frames: 1284792320. Throughput: 0: 10388.7. Samples: 71199528. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:38,976][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:40,080][626795] Updated weights for policy 0, policy_version 156842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:41,920][626795] Updated weights for policy 0, policy_version 156852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:43,749][626795] Updated weights for policy 0, policy_version 156862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:43,975][24592] Fps is (10 sec: 34406.6, 60 sec: 42325.5, 300 sec: 42792.8). Total num frames: 1285021696. Throughput: 0: 10297.1. Samples: 71230416. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:43,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:45,399][626795] Updated weights for policy 0, policy_version 156872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:47,227][626795] Updated weights for policy 0, policy_version 156882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:48,975][24592] Fps is (10 sec: 45875.1, 60 sec: 42462.0, 300 sec: 42792.8). Total num frames: 1285251072. Throughput: 0: 11117.0. Samples: 71300022. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:48,976][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:49,077][626795] Updated weights for policy 0, policy_version 156892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:50,757][626795] Updated weights for policy 0, policy_version 156902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:52,677][626795] Updated weights for policy 0, policy_version 156912 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:53,976][24592] Fps is (10 sec: 45874.3, 60 sec: 42325.2, 300 sec: 43195.5). Total num frames: 1285480448. Throughput: 0: 10937.0. Samples: 71369154. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:53,978][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:54,257][626795] Updated weights for policy 0, policy_version 156922 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:56,104][626795] Updated weights for policy 0, policy_version 156932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:57,799][626795] Updated weights for policy 0, policy_version 156942 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:58,976][24592] Fps is (10 sec: 46691.2, 60 sec: 42461.4, 300 sec: 43264.9). Total num frames: 1285718016. Throughput: 0: 10963.3. Samples: 71404290. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:11:58,977][24592] Avg episode reward: [(0, '4.345')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:11:59,622][626795] Updated weights for policy 0, policy_version 156952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:01,475][626795] Updated weights for policy 0, policy_version 156962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:03,234][626795] Updated weights for policy 0, policy_version 156972 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:03,248][626772] Signal inference workers to stop experience collection... (1050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:03,249][626772] Signal inference workers to resume experience collection... (1050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:03,257][626795] InferenceWorker_p0-w0: stopping experience collection (1050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:03,261][626795] InferenceWorker_p0-w0: resuming experience collection (1050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:03,975][24592] Fps is (10 sec: 47514.7, 60 sec: 44519.0, 300 sec: 43292.6). Total num frames: 1285955584. Throughput: 0: 10962.8. Samples: 71473542. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:03,978][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:04,883][626795] Updated weights for policy 0, policy_version 156982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:06,652][626795] Updated weights for policy 0, policy_version 156992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:08,421][626795] Updated weights for policy 0, policy_version 157002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:08,975][24592] Fps is (10 sec: 45878.3, 60 sec: 44373.4, 300 sec: 43237.1). Total num frames: 1286176768. Throughput: 0: 10945.5. Samples: 71543250. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:08,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:10,283][626795] Updated weights for policy 0, policy_version 157012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:13,975][24592] Fps is (10 sec: 34406.4, 60 sec: 42598.4, 300 sec: 42820.6). Total num frames: 1286299648. Throughput: 0: 10799.9. Samples: 71571762. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:13,977][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:14,450][626795] Updated weights for policy 0, policy_version 157022 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:16,261][626795] Updated weights for policy 0, policy_version 157032 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:18,019][626795] Updated weights for policy 0, policy_version 157042 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:18,975][24592] Fps is (10 sec: 34406.6, 60 sec: 42462.1, 300 sec: 42765.1). Total num frames: 1286520832. Throughput: 0: 10288.8. Samples: 71618364. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:18,976][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:19,850][626795] Updated weights for policy 0, policy_version 157052 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:21,539][626795] Updated weights for policy 0, policy_version 157062 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:23,352][626795] Updated weights for policy 0, policy_version 157072 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:23,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42461.9, 300 sec: 42765.0). Total num frames: 1286758400. Throughput: 0: 10837.5. Samples: 71687214. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:23,976][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:25,066][626795] Updated weights for policy 0, policy_version 157082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:26,894][626795] Updated weights for policy 0, policy_version 157092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:28,753][626795] Updated weights for policy 0, policy_version 157102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:28,976][24592] Fps is (10 sec: 46692.5, 60 sec: 42461.6, 300 sec: 43183.2). Total num frames: 1286987776. Throughput: 0: 10930.1. Samples: 71722272. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:28,977][24592] Avg episode reward: [(0, '4.326')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:30,476][626795] Updated weights for policy 0, policy_version 157112 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:32,133][626795] Updated weights for policy 0, policy_version 157122 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:33,918][626795] Updated weights for policy 0, policy_version 157132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:33,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42461.9, 300 sec: 43265.2). Total num frames: 1287225344. Throughput: 0: 10925.2. Samples: 71791656. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:33,976][24592] Avg episode reward: [(0, '4.493')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:35,729][626795] Updated weights for policy 0, policy_version 157142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:37,543][626795] Updated weights for policy 0, policy_version 157152 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:38,975][24592] Fps is (10 sec: 45877.3, 60 sec: 44236.8, 300 sec: 43209.3). Total num frames: 1287446528. Throughput: 0: 10934.1. Samples: 71861184. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:38,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:39,253][626795] Updated weights for policy 0, policy_version 157162 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:41,039][626795] Updated weights for policy 0, policy_version 157172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:42,791][626795] Updated weights for policy 0, policy_version 157182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:43,976][24592] Fps is (10 sec: 45872.3, 60 sec: 44372.9, 300 sec: 43209.2). Total num frames: 1287684096. Throughput: 0: 10929.3. Samples: 71896110. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:43,977][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:44,608][626795] Updated weights for policy 0, policy_version 157192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:48,797][626795] Updated weights for policy 0, policy_version 157202 (0.2469)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:48,975][24592] Fps is (10 sec: 35225.3, 60 sec: 42461.9, 300 sec: 42792.8). Total num frames: 1287798784. Throughput: 0: 10477.5. Samples: 71945028. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:48,976][24592] Avg episode reward: [(0, '4.463')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:50,707][626795] Updated weights for policy 0, policy_version 157212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:52,338][626795] Updated weights for policy 0, policy_version 157222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:53,975][24592] Fps is (10 sec: 35227.9, 60 sec: 42598.5, 300 sec: 42792.8). Total num frames: 1288036352. Throughput: 0: 10285.2. Samples: 72006084. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:53,976][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:54,164][626795] Updated weights for policy 0, policy_version 157232 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:56,010][626795] Updated weights for policy 0, policy_version 157242 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:57,798][626795] Updated weights for policy 0, policy_version 157252 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:58,975][24592] Fps is (10 sec: 45874.7, 60 sec: 42325.7, 300 sec: 42737.4). Total num frames: 1288257536. Throughput: 0: 10408.9. Samples: 72040164. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:12:58,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:12:59,436][626795] Updated weights for policy 0, policy_version 157262 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:01,303][626795] Updated weights for policy 0, policy_version 157272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:03,086][626795] Updated weights for policy 0, policy_version 157282 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:03,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42325.3, 300 sec: 43167.5). Total num frames: 1288495104. Throughput: 0: 10916.0. Samples: 72109584. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:03,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000157287_1288495104.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000156026_1278164992.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:04,804][626795] Updated weights for policy 0, policy_version 157292 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:06,648][626795] Updated weights for policy 0, policy_version 157302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:08,374][626795] Updated weights for policy 0, policy_version 157312 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:08,976][24592] Fps is (10 sec: 46693.5, 60 sec: 42461.7, 300 sec: 43209.3). Total num frames: 1288724480. Throughput: 0: 10923.5. Samples: 72178776. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:08,977][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:10,172][626795] Updated weights for policy 0, policy_version 157322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:12,024][626795] Updated weights for policy 0, policy_version 157332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:13,674][626795] Updated weights for policy 0, policy_version 157342 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:13,976][24592] Fps is (10 sec: 45874.8, 60 sec: 44236.7, 300 sec: 43209.4). Total num frames: 1288953856. Throughput: 0: 10915.7. Samples: 72213474. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:13,976][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:15,430][626795] Updated weights for policy 0, policy_version 157352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:17,274][626795] Updated weights for policy 0, policy_version 157362 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:18,976][24592] Fps is (10 sec: 45875.5, 60 sec: 44373.1, 300 sec: 43181.5). Total num frames: 1289183232. Throughput: 0: 10913.0. Samples: 72282744. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:18,976][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:18,985][626795] Updated weights for policy 0, policy_version 157372 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:23,302][626795] Updated weights for policy 0, policy_version 157382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:23,976][24592] Fps is (10 sec: 34405.9, 60 sec: 42325.2, 300 sec: 42737.3). Total num frames: 1289297920. Throughput: 0: 10266.9. Samples: 72323196. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:23,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:25,003][626795] Updated weights for policy 0, policy_version 157392 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:26,939][626795] Updated weights for policy 0, policy_version 157402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:28,585][626795] Updated weights for policy 0, policy_version 157412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:28,975][24592] Fps is (10 sec: 35226.3, 60 sec: 42462.1, 300 sec: 42737.5). Total num frames: 1289535488. Throughput: 0: 10259.7. Samples: 72357792. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:28,977][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:30,331][626795] Updated weights for policy 0, policy_version 157422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:32,130][626795] Updated weights for policy 0, policy_version 157432 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:33,921][626795] Updated weights for policy 0, policy_version 157442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:33,975][24592] Fps is (10 sec: 46695.3, 60 sec: 42325.3, 300 sec: 42709.5). Total num frames: 1289764864. Throughput: 0: 10707.5. Samples: 72426864. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:33,977][24592] Avg episode reward: [(0, '4.373')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:35,780][626795] Updated weights for policy 0, policy_version 157452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:37,495][626795] Updated weights for policy 0, policy_version 157462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:38,975][24592] Fps is (10 sec: 45875.4, 60 sec: 42461.8, 300 sec: 43155.1). Total num frames: 1289994240. Throughput: 0: 10905.1. Samples: 72496812. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:38,977][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:39,214][626795] Updated weights for policy 0, policy_version 157472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:40,995][626795] Updated weights for policy 0, policy_version 157482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:42,651][626795] Updated weights for policy 0, policy_version 157492 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:43,975][24592] Fps is (10 sec: 46694.6, 60 sec: 42462.3, 300 sec: 43237.1). Total num frames: 1290231808. Throughput: 0: 10924.8. Samples: 72531780. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:43,977][24592] Avg episode reward: [(0, '4.441')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:44,480][626795] Updated weights for policy 0, policy_version 157502 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:46,273][626795] Updated weights for policy 0, policy_version 157512 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:48,002][626795] Updated weights for policy 0, policy_version 157522 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:48,976][24592] Fps is (10 sec: 45874.3, 60 sec: 44236.7, 300 sec: 43237.2). Total num frames: 1290452992. Throughput: 0: 10923.7. Samples: 72601152. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:48,976][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:49,807][626795] Updated weights for policy 0, policy_version 157532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:51,606][626795] Updated weights for policy 0, policy_version 157542 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:53,337][626795] Updated weights for policy 0, policy_version 157552 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:53,976][24592] Fps is (10 sec: 45872.9, 60 sec: 44236.5, 300 sec: 43237.0). Total num frames: 1290690560. Throughput: 0: 10932.5. Samples: 72670740. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:53,978][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:55,191][626795] Updated weights for policy 0, policy_version 157562 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:58,976][24592] Fps is (10 sec: 35226.2, 60 sec: 42461.9, 300 sec: 42848.3). Total num frames: 1290805248. Throughput: 0: 10567.1. Samples: 72688992. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:13:58,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:13:59,421][626795] Updated weights for policy 0, policy_version 157572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:01,259][626795] Updated weights for policy 0, policy_version 157582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:02,973][626795] Updated weights for policy 0, policy_version 157592 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:03,975][24592] Fps is (10 sec: 34407.6, 60 sec: 42325.2, 300 sec: 42876.1). Total num frames: 1291034624. Throughput: 0: 10267.8. Samples: 72744792. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:03,977][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:04,840][626795] Updated weights for policy 0, policy_version 157602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:06,622][626795] Updated weights for policy 0, policy_version 157612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:08,343][626795] Updated weights for policy 0, policy_version 157622 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:08,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42325.6, 300 sec: 42848.3). Total num frames: 1291264000. Throughput: 0: 10904.9. Samples: 72813912. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:08,976][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:10,030][626795] Updated weights for policy 0, policy_version 157632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:11,872][626795] Updated weights for policy 0, policy_version 157642 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:13,538][626795] Updated weights for policy 0, policy_version 157652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:13,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42461.8, 300 sec: 43129.3). Total num frames: 1291501568. Throughput: 0: 10919.2. Samples: 72849156. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:13,977][24592] Avg episode reward: [(0, '4.375')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:15,369][626795] Updated weights for policy 0, policy_version 157662 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:17,075][626795] Updated weights for policy 0, policy_version 157672 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:18,887][626795] Updated weights for policy 0, policy_version 157682 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:18,976][24592] Fps is (10 sec: 46693.4, 60 sec: 42461.9, 300 sec: 43153.8). Total num frames: 1291730944. Throughput: 0: 10937.7. Samples: 72919062. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:18,977][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:20,705][626795] Updated weights for policy 0, policy_version 157692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:22,335][626795] Updated weights for policy 0, policy_version 157702 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:23,975][24592] Fps is (10 sec: 45875.8, 60 sec: 44373.5, 300 sec: 43126.0). Total num frames: 1291960320. Throughput: 0: 10946.4. Samples: 72989400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:23,977][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:24,139][626795] Updated weights for policy 0, policy_version 157712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:25,921][626795] Updated weights for policy 0, policy_version 157722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:27,582][626795] Updated weights for policy 0, policy_version 157732 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:28,976][24592] Fps is (10 sec: 46693.8, 60 sec: 44373.1, 300 sec: 43126.0). Total num frames: 1292197888. Throughput: 0: 10938.4. Samples: 73024014. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:28,977][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:29,376][626795] Updated weights for policy 0, policy_version 157742 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:33,688][626795] Updated weights for policy 0, policy_version 157752 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:33,976][24592] Fps is (10 sec: 36043.6, 60 sec: 42598.2, 300 sec: 42737.2). Total num frames: 1292320768. Throughput: 0: 10306.0. Samples: 73064922. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:33,977][24592] Avg episode reward: [(0, '4.450')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:35,406][626795] Updated weights for policy 0, policy_version 157762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:37,175][626795] Updated weights for policy 0, policy_version 157772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:38,931][626795] Updated weights for policy 0, policy_version 157782 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:38,975][24592] Fps is (10 sec: 35226.7, 60 sec: 42598.4, 300 sec: 42737.2). Total num frames: 1292550144. Throughput: 0: 10317.6. Samples: 73135026. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:38,976][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:40,722][626795] Updated weights for policy 0, policy_version 157792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:42,561][626795] Updated weights for policy 0, policy_version 157802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:43,975][24592] Fps is (10 sec: 45876.7, 60 sec: 42461.8, 300 sec: 42737.5). Total num frames: 1292779520. Throughput: 0: 10674.1. Samples: 73169328. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:43,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:44,280][626795] Updated weights for policy 0, policy_version 157812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:45,928][626795] Updated weights for policy 0, policy_version 157822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:47,718][626795] Updated weights for policy 0, policy_version 157832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:48,975][24592] Fps is (10 sec: 45875.1, 60 sec: 42598.5, 300 sec: 43114.8). Total num frames: 1293008896. Throughput: 0: 10996.3. Samples: 73239624. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:48,976][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:49,560][626795] Updated weights for policy 0, policy_version 157842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:51,202][626795] Updated weights for policy 0, policy_version 157852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:52,952][626795] Updated weights for policy 0, policy_version 157862 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:53,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42598.7, 300 sec: 43181.6). Total num frames: 1293246464. Throughput: 0: 11008.9. Samples: 73309314. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:53,978][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:54,820][626795] Updated weights for policy 0, policy_version 157872 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:56,525][626795] Updated weights for policy 0, policy_version 157882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:14:58,385][626795] Updated weights for policy 0, policy_version 157892 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:58,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44509.9, 300 sec: 43181.6). Total num frames: 1293475840. Throughput: 0: 10995.8. Samples: 73343964. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:14:58,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:00,160][626795] Updated weights for policy 0, policy_version 157902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:01,791][626795] Updated weights for policy 0, policy_version 157912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:03,539][626795] Updated weights for policy 0, policy_version 157922 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:03,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44510.0, 300 sec: 43153.8). Total num frames: 1293705216. Throughput: 0: 10985.5. Samples: 73413408. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:03,977][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000157923_1293705216.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:04,037][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000156653_1283301376.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:07,951][626795] Updated weights for policy 0, policy_version 157932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:08,975][24592] Fps is (10 sec: 35225.4, 60 sec: 42734.9, 300 sec: 42820.5). Total num frames: 1293828096. Throughput: 0: 10324.3. Samples: 73453992. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:08,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:09,654][626795] Updated weights for policy 0, policy_version 157942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:11,472][626795] Updated weights for policy 0, policy_version 157952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:13,163][626795] Updated weights for policy 0, policy_version 157962 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:13,975][24592] Fps is (10 sec: 35225.8, 60 sec: 42598.6, 300 sec: 42792.8). Total num frames: 1294057472. Throughput: 0: 10331.4. Samples: 73488924. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:13,977][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:14,969][626795] Updated weights for policy 0, policy_version 157972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:16,778][626795] Updated weights for policy 0, policy_version 157982 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:18,598][626795] Updated weights for policy 0, policy_version 157992 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:18,976][24592] Fps is (10 sec: 45874.1, 60 sec: 42598.3, 300 sec: 42765.0). Total num frames: 1294286848. Throughput: 0: 10957.3. Samples: 73558002. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:18,976][24592] Avg episode reward: [(0, '4.303')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:20,313][626795] Updated weights for policy 0, policy_version 158002 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:22,021][626795] Updated weights for policy 0, policy_version 158012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:23,729][626795] Updated weights for policy 0, policy_version 158022 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:23,978][24592] Fps is (10 sec: 46685.8, 60 sec: 42733.7, 300 sec: 43170.6). Total num frames: 1294524416. Throughput: 0: 10963.7. Samples: 73628412. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:23,979][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:25,554][626795] Updated weights for policy 0, policy_version 158032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:27,268][626795] Updated weights for policy 0, policy_version 158042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:28,975][24592] Fps is (10 sec: 46695.8, 60 sec: 42598.6, 300 sec: 43153.8). Total num frames: 1294753792. Throughput: 0: 10979.3. Samples: 73663398. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:28,976][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:29,008][626795] Updated weights for policy 0, policy_version 158052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:30,739][626795] Updated weights for policy 0, policy_version 158062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:32,553][626795] Updated weights for policy 0, policy_version 158072 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:33,976][24592] Fps is (10 sec: 46701.6, 60 sec: 44510.0, 300 sec: 43209.3). Total num frames: 1294991360. Throughput: 0: 10977.7. Samples: 73733622. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:33,976][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:34,290][626795] Updated weights for policy 0, policy_version 158082 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:36,064][626795] Updated weights for policy 0, policy_version 158092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:37,861][626795] Updated weights for policy 0, policy_version 158102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:38,976][24592] Fps is (10 sec: 46692.5, 60 sec: 44509.6, 300 sec: 43181.5). Total num frames: 1295220736. Throughput: 0: 10963.6. Samples: 73802682. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:38,977][24592] Avg episode reward: [(0, '4.245')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:42,135][626795] Updated weights for policy 0, policy_version 158112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:43,793][626795] Updated weights for policy 0, policy_version 158122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:43,975][24592] Fps is (10 sec: 34407.3, 60 sec: 42598.4, 300 sec: 42820.6). Total num frames: 1295335424. Throughput: 0: 10404.8. Samples: 73812180. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:43,977][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:45,571][626795] Updated weights for policy 0, policy_version 158132 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:47,382][626795] Updated weights for policy 0, policy_version 158142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:48,975][24592] Fps is (10 sec: 35227.0, 60 sec: 42735.0, 300 sec: 42820.6). Total num frames: 1295572992. Throughput: 0: 10340.4. Samples: 73878726. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:48,977][24592] Avg episode reward: [(0, '4.317')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:49,206][626795] Updated weights for policy 0, policy_version 158152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:50,850][626795] Updated weights for policy 0, policy_version 158162 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:52,714][626795] Updated weights for policy 0, policy_version 158172 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:53,975][24592] Fps is (10 sec: 46694.0, 60 sec: 42598.4, 300 sec: 42820.5). Total num frames: 1295802368. Throughput: 0: 10974.7. Samples: 73947852. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:53,978][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:54,522][626795] Updated weights for policy 0, policy_version 158182 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:56,223][626795] Updated weights for policy 0, policy_version 158192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:58,038][626795] Updated weights for policy 0, policy_version 158202 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:58,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42598.4, 300 sec: 43210.3). Total num frames: 1296031744. Throughput: 0: 10969.6. Samples: 73982556. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:15:58,977][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:15:59,714][626795] Updated weights for policy 0, policy_version 158212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:01,491][626795] Updated weights for policy 0, policy_version 158222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:03,319][626795] Updated weights for policy 0, policy_version 158232 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:03,977][24592] Fps is (10 sec: 45869.9, 60 sec: 42597.5, 300 sec: 43209.2). Total num frames: 1296261120. Throughput: 0: 10978.8. Samples: 74052060. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:03,978][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:05,004][626795] Updated weights for policy 0, policy_version 158242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:06,854][626795] Updated weights for policy 0, policy_version 158252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:08,558][626795] Updated weights for policy 0, policy_version 158262 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:08,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44509.9, 300 sec: 43237.1). Total num frames: 1296498688. Throughput: 0: 10972.6. Samples: 74122158. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:08,977][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:10,339][626795] Updated weights for policy 0, policy_version 158272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:12,037][626795] Updated weights for policy 0, policy_version 158282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:13,807][626795] Updated weights for policy 0, policy_version 158292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:13,976][24592] Fps is (10 sec: 47517.4, 60 sec: 44646.1, 300 sec: 43264.9). Total num frames: 1296736256. Throughput: 0: 10971.6. Samples: 74157126. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:13,977][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:18,096][626795] Updated weights for policy 0, policy_version 158302 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:18,975][24592] Fps is (10 sec: 35225.4, 60 sec: 42735.1, 300 sec: 42848.3). Total num frames: 1296850944. Throughput: 0: 10327.9. Samples: 74198376. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:18,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:19,817][626795] Updated weights for policy 0, policy_version 158312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:21,584][626795] Updated weights for policy 0, policy_version 158322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:23,368][626795] Updated weights for policy 0, policy_version 158332 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:23,976][24592] Fps is (10 sec: 34405.8, 60 sec: 42599.3, 300 sec: 42848.2). Total num frames: 1297080320. Throughput: 0: 10340.4. Samples: 74268000. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:23,978][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:25,066][626795] Updated weights for policy 0, policy_version 158342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:26,928][626795] Updated weights for policy 0, policy_version 158352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:28,729][626795] Updated weights for policy 0, policy_version 158362 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:28,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42734.9, 300 sec: 42848.3). Total num frames: 1297317888. Throughput: 0: 10900.0. Samples: 74302680. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:28,978][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:30,372][626795] Updated weights for policy 0, policy_version 158372 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:32,075][626795] Updated weights for policy 0, policy_version 158382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:33,878][626795] Updated weights for policy 0, policy_version 158392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:33,975][24592] Fps is (10 sec: 46697.1, 60 sec: 42598.6, 300 sec: 43237.1). Total num frames: 1297547264. Throughput: 0: 10988.5. Samples: 74373210. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:33,976][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:35,610][626795] Updated weights for policy 0, policy_version 158402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:37,380][626795] Updated weights for policy 0, policy_version 158412 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:38,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42598.7, 300 sec: 43237.1). Total num frames: 1297776640. Throughput: 0: 10999.6. Samples: 74442834. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:38,977][24592] Avg episode reward: [(0, '4.805')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:39,134][626795] Updated weights for policy 0, policy_version 158422 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:40,964][626795] Updated weights for policy 0, policy_version 158432 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:42,694][626795] Updated weights for policy 0, policy_version 158442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:43,975][24592] Fps is (10 sec: 46693.9, 60 sec: 44646.3, 300 sec: 43264.9). Total num frames: 1298014208. Throughput: 0: 11008.9. Samples: 74477958. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:43,977][24592] Avg episode reward: [(0, '4.346')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:44,453][626795] Updated weights for policy 0, policy_version 158452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:46,187][626795] Updated weights for policy 0, policy_version 158462 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:47,964][626795] Updated weights for policy 0, policy_version 158472 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:51,321][24592] Fps is (10 sec: 37821.5, 60 sec: 42835.0, 300 sec: 42923.5). Total num frames: 1298243584. Throughput: 0: 10472.2. Samples: 74547864. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:51,323][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:52,322][626795] Updated weights for policy 0, policy_version 158482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:53,922][626795] Updated weights for policy 0, policy_version 158492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:53,975][24592] Fps is (10 sec: 35225.9, 60 sec: 42735.0, 300 sec: 42876.2). Total num frames: 1298366464. Throughput: 0: 10359.5. Samples: 74588334. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:53,976][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:55,791][626795] Updated weights for policy 0, policy_version 158502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:57,570][626795] Updated weights for policy 0, policy_version 158512 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:58,975][24592] Fps is (10 sec: 46022.6, 60 sec: 42734.9, 300 sec: 42848.3). Total num frames: 1298595840. Throughput: 0: 10347.4. Samples: 74622756. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:16:58,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:16:59,278][626795] Updated weights for policy 0, policy_version 158522 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:01,048][626795] Updated weights for policy 0, policy_version 158532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:02,791][626795] Updated weights for policy 0, policy_version 158542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:03,975][24592] Fps is (10 sec: 45874.5, 60 sec: 42735.7, 300 sec: 42876.1). Total num frames: 1298825216. Throughput: 0: 10986.2. Samples: 74692758. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:03,978][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000158548_1298825216.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:04,051][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000157287_1288495104.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:04,553][626795] Updated weights for policy 0, policy_version 158552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:06,367][626795] Updated weights for policy 0, policy_version 158562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:08,131][626795] Updated weights for policy 0, policy_version 158572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:08,975][24592] Fps is (10 sec: 45874.9, 60 sec: 42598.4, 300 sec: 43237.1). Total num frames: 1299054592. Throughput: 0: 10985.2. Samples: 74762328. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:08,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:09,772][626795] Updated weights for policy 0, policy_version 158582 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:11,554][626795] Updated weights for policy 0, policy_version 158592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:13,349][626795] Updated weights for policy 0, policy_version 158602 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:13,975][24592] Fps is (10 sec: 46694.9, 60 sec: 42598.7, 300 sec: 43292.6). Total num frames: 1299292160. Throughput: 0: 10994.5. Samples: 74797434. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:13,977][24592] Avg episode reward: [(0, '4.330')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:15,122][626795] Updated weights for policy 0, policy_version 158612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:16,952][626795] Updated weights for policy 0, policy_version 158622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:18,710][626795] Updated weights for policy 0, policy_version 158632 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:18,976][24592] Fps is (10 sec: 46692.8, 60 sec: 44509.6, 300 sec: 43264.8). Total num frames: 1299521536. Throughput: 0: 10966.0. Samples: 74866686. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:18,977][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:20,411][626795] Updated weights for policy 0, policy_version 158642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:22,257][626795] Updated weights for policy 0, policy_version 158652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:22,879][626772] Signal inference workers to stop experience collection... (1100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:22,884][626772] Signal inference workers to resume experience collection... (1100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:22,896][626795] InferenceWorker_p0-w0: stopping experience collection (1100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:22,897][626795] InferenceWorker_p0-w0: resuming experience collection (1100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:26,049][24592] Fps is (10 sec: 35961.5, 60 sec: 42627.6, 300 sec: 42880.2). Total num frames: 1299726336. Throughput: 0: 9755.3. Samples: 74902050. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:26,050][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:26,590][626795] Updated weights for policy 0, policy_version 158662 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:28,254][626795] Updated weights for policy 0, policy_version 158672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:28,976][24592] Fps is (10 sec: 34406.7, 60 sec: 42461.7, 300 sec: 42848.3). Total num frames: 1299865600. Throughput: 0: 10315.7. Samples: 74942166. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:28,978][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:29,995][626795] Updated weights for policy 0, policy_version 158682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:31,801][626795] Updated weights for policy 0, policy_version 158692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:33,617][626795] Updated weights for policy 0, policy_version 158702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:33,975][24592] Fps is (10 sec: 47540.0, 60 sec: 42598.4, 300 sec: 42903.9). Total num frames: 1300103168. Throughput: 0: 10866.1. Samples: 75011346. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:33,977][24592] Avg episode reward: [(0, '4.312')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:35,260][626795] Updated weights for policy 0, policy_version 158712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:37,064][626795] Updated weights for policy 0, policy_version 158722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:38,852][626795] Updated weights for policy 0, policy_version 158732 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:38,975][24592] Fps is (10 sec: 47515.1, 60 sec: 42735.0, 300 sec: 42904.0). Total num frames: 1300340736. Throughput: 0: 10971.2. Samples: 75082038. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:38,976][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:40,573][626795] Updated weights for policy 0, policy_version 158742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:42,282][626795] Updated weights for policy 0, policy_version 158752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:43,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42598.5, 300 sec: 43292.6). Total num frames: 1300570112. Throughput: 0: 10969.2. Samples: 75116370. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:43,978][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:44,132][626795] Updated weights for policy 0, policy_version 158762 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:45,857][626795] Updated weights for policy 0, policy_version 158772 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:47,623][626795] Updated weights for policy 0, policy_version 158782 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:48,975][24592] Fps is (10 sec: 45874.9, 60 sec: 44331.7, 300 sec: 43264.9). Total num frames: 1300799488. Throughput: 0: 10973.6. Samples: 75186570. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:48,977][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:49,331][626795] Updated weights for policy 0, policy_version 158792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:51,067][626795] Updated weights for policy 0, policy_version 158802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:52,954][626795] Updated weights for policy 0, policy_version 158812 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:53,975][24592] Fps is (10 sec: 46694.2, 60 sec: 44509.8, 300 sec: 43320.4). Total num frames: 1301037056. Throughput: 0: 10979.5. Samples: 75256404. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:17:53,977][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:54,645][626795] Updated weights for policy 0, policy_version 158822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:56,390][626795] Updated weights for policy 0, policy_version 158832 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:17:58,192][626795] Updated weights for policy 0, policy_version 158842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:00,774][24592] Fps is (10 sec: 36798.8, 60 sec: 42684.1, 300 sec: 42919.9). Total num frames: 1301233664. Throughput: 0: 10552.3. Samples: 75291270. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:00,776][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:02,472][626795] Updated weights for policy 0, policy_version 158852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:03,976][24592] Fps is (10 sec: 34405.3, 60 sec: 42598.2, 300 sec: 42903.9). Total num frames: 1301381120. Throughput: 0: 10343.6. Samples: 75332148. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:03,976][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:04,193][626795] Updated weights for policy 0, policy_version 158862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:06,061][626795] Updated weights for policy 0, policy_version 158872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:07,667][626795] Updated weights for policy 0, policy_version 158882 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:08,976][24592] Fps is (10 sec: 45944.0, 60 sec: 42597.9, 300 sec: 42903.8). Total num frames: 1301610496. Throughput: 0: 11639.0. Samples: 75401682. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:08,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:09,454][626795] Updated weights for policy 0, policy_version 158892 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:11,312][626795] Updated weights for policy 0, policy_version 158902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:12,948][626795] Updated weights for policy 0, policy_version 158912 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:13,975][24592] Fps is (10 sec: 46696.1, 60 sec: 42598.4, 300 sec: 42931.7). Total num frames: 1301848064. Throughput: 0: 10986.5. Samples: 75436554. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:13,976][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:14,778][626795] Updated weights for policy 0, policy_version 158922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:16,483][626795] Updated weights for policy 0, policy_version 158932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:18,244][626795] Updated weights for policy 0, policy_version 158942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:18,976][24592] Fps is (10 sec: 46695.4, 60 sec: 42598.4, 300 sec: 43320.4). Total num frames: 1302077440. Throughput: 0: 11003.5. Samples: 75506508. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:18,978][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:19,973][626795] Updated weights for policy 0, policy_version 158952 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:21,787][626795] Updated weights for policy 0, policy_version 158962 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:23,472][626795] Updated weights for policy 0, policy_version 158972 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:23,976][24592] Fps is (10 sec: 46692.4, 60 sec: 44688.5, 300 sec: 43320.3). Total num frames: 1302315008. Throughput: 0: 11001.8. Samples: 75577122. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:23,978][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:25,225][626795] Updated weights for policy 0, policy_version 158982 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:26,850][626795] Updated weights for policy 0, policy_version 158992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:28,688][626795] Updated weights for policy 0, policy_version 159002 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:28,975][24592] Fps is (10 sec: 47515.9, 60 sec: 44783.2, 300 sec: 43348.2). Total num frames: 1302552576. Throughput: 0: 11028.5. Samples: 75612654. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:28,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:30,574][626795] Updated weights for policy 0, policy_version 159012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:32,309][626795] Updated weights for policy 0, policy_version 159022 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:35,609][24592] Fps is (10 sec: 36618.7, 60 sec: 42798.6, 300 sec: 42971.4). Total num frames: 1302740992. Throughput: 0: 10619.2. Samples: 75681780. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:35,610][24592] Avg episode reward: [(0, '4.430')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:36,624][626795] Updated weights for policy 0, policy_version 159032 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:38,453][626795] Updated weights for policy 0, policy_version 159042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:38,975][24592] Fps is (10 sec: 34406.2, 60 sec: 42598.4, 300 sec: 42931.6). Total num frames: 1302896640. Throughput: 0: 10329.2. Samples: 75721218. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:38,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:40,227][626795] Updated weights for policy 0, policy_version 159052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:42,004][626795] Updated weights for policy 0, policy_version 159062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:43,823][626795] Updated weights for policy 0, policy_version 159072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:43,975][24592] Fps is (10 sec: 45039.9, 60 sec: 42461.9, 300 sec: 42931.7). Total num frames: 1303117824. Throughput: 0: 10740.4. Samples: 75755268. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:43,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:45,568][626795] Updated weights for policy 0, policy_version 159082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:47,191][626795] Updated weights for policy 0, policy_version 159092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:48,914][626795] Updated weights for policy 0, policy_version 159102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:48,975][24592] Fps is (10 sec: 46694.6, 60 sec: 42735.0, 300 sec: 42959.5). Total num frames: 1303363584. Throughput: 0: 10973.0. Samples: 75825930. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:48,976][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:50,680][626795] Updated weights for policy 0, policy_version 159112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:52,366][626795] Updated weights for policy 0, policy_version 159122 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:53,975][24592] Fps is (10 sec: 48332.1, 60 sec: 42734.9, 300 sec: 43375.9). Total num frames: 1303601152. Throughput: 0: 11010.8. Samples: 75897162. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:53,976][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:54,271][626795] Updated weights for policy 0, policy_version 159132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:55,901][626795] Updated weights for policy 0, policy_version 159142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:57,753][626795] Updated weights for policy 0, policy_version 159152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:58,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44618.7, 300 sec: 43376.0). Total num frames: 1303830528. Throughput: 0: 11000.0. Samples: 75931554. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:18:58,976][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:18:59,441][626795] Updated weights for policy 0, policy_version 159162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:01,262][626795] Updated weights for policy 0, policy_version 159172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:02,972][626795] Updated weights for policy 0, policy_version 159182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:03,976][24592] Fps is (10 sec: 45875.1, 60 sec: 44646.5, 300 sec: 43375.9). Total num frames: 1304059904. Throughput: 0: 10997.4. Samples: 76001388. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:03,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000159187_1304059904.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:04,046][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000157923_1293705216.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:04,824][626795] Updated weights for policy 0, policy_version 159192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:06,556][626795] Updated weights for policy 0, policy_version 159202 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:10,418][24592] Fps is (10 sec: 35079.2, 60 sec: 42665.2, 300 sec: 42943.8). Total num frames: 1304231936. Throughput: 0: 9860.6. Samples: 76035072. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:10,419][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:10,918][626795] Updated weights for policy 0, policy_version 159212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:12,716][626795] Updated weights for policy 0, policy_version 159222 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:13,975][24592] Fps is (10 sec: 34406.9, 60 sec: 42598.4, 300 sec: 42959.4). Total num frames: 1304403968. Throughput: 0: 10287.1. Samples: 76075572. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:13,977][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:14,476][626795] Updated weights for policy 0, policy_version 159232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:16,259][626795] Updated weights for policy 0, policy_version 159242 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:18,042][626795] Updated weights for policy 0, policy_version 159252 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:18,975][24592] Fps is (10 sec: 45951.8, 60 sec: 42462.2, 300 sec: 42931.6). Total num frames: 1304625152. Throughput: 0: 10666.2. Samples: 76144338. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:18,976][24592] Avg episode reward: [(0, '4.337')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:19,776][626795] Updated weights for policy 0, policy_version 159262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:21,561][626795] Updated weights for policy 0, policy_version 159272 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:23,235][626795] Updated weights for policy 0, policy_version 159282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:23,975][24592] Fps is (10 sec: 46693.9, 60 sec: 42598.6, 300 sec: 42959.4). Total num frames: 1304870912. Throughput: 0: 10958.1. Samples: 76214334. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:23,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:24,991][626795] Updated weights for policy 0, policy_version 159292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:26,631][626795] Updated weights for policy 0, policy_version 159302 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:28,424][626795] Updated weights for policy 0, policy_version 159312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:28,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42598.4, 300 sec: 43348.2). Total num frames: 1305108480. Throughput: 0: 11006.4. Samples: 76250556. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:28,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:30,232][626795] Updated weights for policy 0, policy_version 159322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:31,982][626795] Updated weights for policy 0, policy_version 159332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:33,681][626795] Updated weights for policy 0, policy_version 159342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:33,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44492.2, 300 sec: 43348.2). Total num frames: 1305337856. Throughput: 0: 10997.8. Samples: 76320834. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:33,977][24592] Avg episode reward: [(0, '4.386')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:35,467][626795] Updated weights for policy 0, policy_version 159352 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:37,282][626795] Updated weights for policy 0, policy_version 159362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:38,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44509.9, 300 sec: 43348.2). Total num frames: 1305567232. Throughput: 0: 10951.4. Samples: 76389972. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:38,976][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:38,985][626795] Updated weights for policy 0, policy_version 159372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:40,816][626795] Updated weights for policy 0, policy_version 159382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:42,575][626795] Updated weights for policy 0, policy_version 159392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:45,343][24592] Fps is (10 sec: 35310.6, 60 sec: 42716.7, 300 sec: 42954.6). Total num frames: 1305739264. Throughput: 0: 10629.4. Samples: 76424418. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:45,345][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:47,013][626795] Updated weights for policy 0, policy_version 159402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:48,874][626795] Updated weights for policy 0, policy_version 159412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:48,975][24592] Fps is (10 sec: 33587.1, 60 sec: 42325.3, 300 sec: 42903.9). Total num frames: 1305903104. Throughput: 0: 10252.7. Samples: 76462758. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:48,977][24592] Avg episode reward: [(0, '4.393')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:50,685][626795] Updated weights for policy 0, policy_version 159422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:52,403][626795] Updated weights for policy 0, policy_version 159432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:53,975][24592] Fps is (10 sec: 45553.1, 60 sec: 42188.8, 300 sec: 42903.9). Total num frames: 1306132480. Throughput: 0: 11391.9. Samples: 76531272. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:53,977][24592] Avg episode reward: [(0, '4.462')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:54,250][626795] Updated weights for policy 0, policy_version 159442 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:55,982][626795] Updated weights for policy 0, policy_version 159452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:57,667][626795] Updated weights for policy 0, policy_version 159462 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:58,975][24592] Fps is (10 sec: 46694.8, 60 sec: 42325.3, 300 sec: 42931.6). Total num frames: 1306370048. Throughput: 0: 10901.6. Samples: 76566144. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:19:58,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:19:59,414][626795] Updated weights for policy 0, policy_version 159472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:01,162][626795] Updated weights for policy 0, policy_version 159482 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:02,768][626795] Updated weights for policy 0, policy_version 159492 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:03,975][24592] Fps is (10 sec: 47514.0, 60 sec: 42462.0, 300 sec: 43320.4). Total num frames: 1306607616. Throughput: 0: 10979.1. Samples: 76638396. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:03,976][24592] Avg episode reward: [(0, '4.432')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:04,523][626795] Updated weights for policy 0, policy_version 159502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:06,167][626795] Updated weights for policy 0, policy_version 159512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:07,989][626795] Updated weights for policy 0, policy_version 159522 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:08,975][24592] Fps is (10 sec: 48332.4, 60 sec: 44767.2, 300 sec: 43375.9). Total num frames: 1306853376. Throughput: 0: 11024.3. Samples: 76710426. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:08,976][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:09,625][626795] Updated weights for policy 0, policy_version 159532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:11,325][626795] Updated weights for policy 0, policy_version 159542 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:13,114][626795] Updated weights for policy 0, policy_version 159552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:13,976][24592] Fps is (10 sec: 48330.6, 60 sec: 44782.6, 300 sec: 43403.7). Total num frames: 1307090944. Throughput: 0: 11002.4. Samples: 76745670. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:13,979][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:15,031][626795] Updated weights for policy 0, policy_version 159562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:16,630][626795] Updated weights for policy 0, policy_version 159572 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:20,328][24592] Fps is (10 sec: 34636.5, 60 sec: 42727.4, 300 sec: 42929.4). Total num frames: 1307246592. Throughput: 0: 9917.3. Samples: 76780524. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:20,329][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:21,189][626795] Updated weights for policy 0, policy_version 159582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:22,923][626795] Updated weights for policy 0, policy_version 159592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:23,975][24592] Fps is (10 sec: 32769.3, 60 sec: 42461.9, 300 sec: 42931.6). Total num frames: 1307418624. Throughput: 0: 10276.9. Samples: 76852434. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:23,978][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:24,814][626795] Updated weights for policy 0, policy_version 159602 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:26,548][626795] Updated weights for policy 0, policy_version 159612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:28,383][626795] Updated weights for policy 0, policy_version 159622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:28,976][24592] Fps is (10 sec: 46415.5, 60 sec: 42324.8, 300 sec: 42903.8). Total num frames: 1307648000. Throughput: 0: 10600.5. Samples: 76886946. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:28,978][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:30,151][626795] Updated weights for policy 0, policy_version 159632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:31,803][626795] Updated weights for policy 0, policy_version 159642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:33,508][626795] Updated weights for policy 0, policy_version 159652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:33,975][24592] Fps is (10 sec: 46694.8, 60 sec: 42462.0, 300 sec: 42931.7). Total num frames: 1307885568. Throughput: 0: 10986.5. Samples: 76957152. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:33,977][24592] Avg episode reward: [(0, '4.370')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:35,288][626795] Updated weights for policy 0, policy_version 159662 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:36,963][626795] Updated weights for policy 0, policy_version 159672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:38,699][626795] Updated weights for policy 0, policy_version 159682 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:38,976][24592] Fps is (10 sec: 47515.8, 60 sec: 42598.2, 300 sec: 43348.1). Total num frames: 1308123136. Throughput: 0: 11060.9. Samples: 77029014. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:38,977][24592] Avg episode reward: [(0, '4.342')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:40,391][626795] Updated weights for policy 0, policy_version 159692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:42,138][626795] Updated weights for policy 0, policy_version 159702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:43,762][626795] Updated weights for policy 0, policy_version 159712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:43,975][24592] Fps is (10 sec: 47513.0, 60 sec: 44710.0, 300 sec: 43348.2). Total num frames: 1308360704. Throughput: 0: 11080.1. Samples: 77064750. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:43,977][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:45,533][626795] Updated weights for policy 0, policy_version 159722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:47,333][626795] Updated weights for policy 0, policy_version 159732 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:48,975][24592] Fps is (10 sec: 47515.3, 60 sec: 44919.5, 300 sec: 43376.0). Total num frames: 1308598272. Throughput: 0: 11042.9. Samples: 77135328. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:48,977][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:49,142][626795] Updated weights for policy 0, policy_version 159742 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:50,892][626795] Updated weights for policy 0, policy_version 159752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:55,363][24592] Fps is (10 sec: 35251.1, 60 sec: 42836.9, 300 sec: 42951.8). Total num frames: 1308762112. Throughput: 0: 9896.7. Samples: 77169504. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:55,363][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:55,537][626795] Updated weights for policy 0, policy_version 159762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:57,395][626795] Updated weights for policy 0, policy_version 159772 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:58,975][24592] Fps is (10 sec: 31948.4, 60 sec: 42461.7, 300 sec: 42904.0). Total num frames: 1308917760. Throughput: 0: 10231.8. Samples: 77206098. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:20:58,978][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:20:59,208][626795] Updated weights for policy 0, policy_version 159782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:01,015][626795] Updated weights for policy 0, policy_version 159792 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:02,894][626795] Updated weights for policy 0, policy_version 159802 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:03,976][24592] Fps is (10 sec: 43751.8, 60 sec: 42188.7, 300 sec: 42848.3). Total num frames: 1309138944. Throughput: 0: 11278.9. Samples: 77272818. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:03,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:04,006][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000159808_1309147136.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:04,113][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000158548_1298825216.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:04,720][626795] Updated weights for policy 0, policy_version 159812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:06,499][626795] Updated weights for policy 0, policy_version 159822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:08,173][626795] Updated weights for policy 0, policy_version 159832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:08,975][24592] Fps is (10 sec: 45875.8, 60 sec: 42052.3, 300 sec: 42848.4). Total num frames: 1309376512. Throughput: 0: 10876.7. Samples: 77341884. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:08,976][24592] Avg episode reward: [(0, '4.462')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:09,992][626795] Updated weights for policy 0, policy_version 159842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:11,653][626795] Updated weights for policy 0, policy_version 159852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:13,402][626795] Updated weights for policy 0, policy_version 159862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:13,975][24592] Fps is (10 sec: 47514.4, 60 sec: 42052.6, 300 sec: 43264.9). Total num frames: 1309614080. Throughput: 0: 10900.9. Samples: 77377476. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:13,977][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:15,063][626795] Updated weights for policy 0, policy_version 159872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:16,769][626795] Updated weights for policy 0, policy_version 159882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:18,462][626795] Updated weights for policy 0, policy_version 159892 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:18,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44419.0, 300 sec: 43292.7). Total num frames: 1309851648. Throughput: 0: 10945.9. Samples: 77449716. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:18,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:20,286][626795] Updated weights for policy 0, policy_version 159902 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:21,980][626795] Updated weights for policy 0, policy_version 159912 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:23,715][626795] Updated weights for policy 0, policy_version 159922 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:23,976][24592] Fps is (10 sec: 47509.6, 60 sec: 44509.3, 300 sec: 43292.5). Total num frames: 1310089216. Throughput: 0: 10906.0. Samples: 77519790. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:23,978][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:25,474][626795] Updated weights for policy 0, policy_version 159932 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:27,408][626795] Updated weights for policy 0, policy_version 159942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:30,340][24592] Fps is (10 sec: 35320.5, 60 sec: 42452.6, 300 sec: 42872.1). Total num frames: 1310253056. Throughput: 0: 10568.7. Samples: 77554764. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:30,341][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:31,799][626795] Updated weights for policy 0, policy_version 159952 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:33,734][626795] Updated weights for policy 0, policy_version 159962 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:33,976][24592] Fps is (10 sec: 32769.1, 60 sec: 42188.4, 300 sec: 42848.3). Total num frames: 1310416896. Throughput: 0: 10141.0. Samples: 77591676. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:33,978][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:35,480][626795] Updated weights for policy 0, policy_version 159972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:37,367][626795] Updated weights for policy 0, policy_version 159982 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:38,976][24592] Fps is (10 sec: 44584.8, 60 sec: 41915.6, 300 sec: 42792.7). Total num frames: 1310638080. Throughput: 0: 11216.0. Samples: 77658672. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:38,977][24592] Avg episode reward: [(0, '4.359')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:39,184][626795] Updated weights for policy 0, policy_version 159992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:40,851][626795] Updated weights for policy 0, policy_version 160002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:42,504][626795] Updated weights for policy 0, policy_version 160012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:43,975][24592] Fps is (10 sec: 46696.6, 60 sec: 42052.3, 300 sec: 43191.8). Total num frames: 1310883840. Throughput: 0: 10839.6. Samples: 77693880. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:43,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:44,348][626795] Updated weights for policy 0, policy_version 160022 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:46,047][626795] Updated weights for policy 0, policy_version 160032 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:47,812][626795] Updated weights for policy 0, policy_version 160042 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:48,975][24592] Fps is (10 sec: 48335.1, 60 sec: 42052.2, 300 sec: 43237.1). Total num frames: 1311121408. Throughput: 0: 10947.0. Samples: 77765430. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:48,976][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:49,423][626795] Updated weights for policy 0, policy_version 160052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:51,147][626795] Updated weights for policy 0, policy_version 160062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:52,867][626795] Updated weights for policy 0, policy_version 160072 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:53,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44305.4, 300 sec: 43264.9). Total num frames: 1311358976. Throughput: 0: 11022.1. Samples: 77837880. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:53,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:54,597][626795] Updated weights for policy 0, policy_version 160082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:56,313][626795] Updated weights for policy 0, policy_version 160092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:58,092][626795] Updated weights for policy 0, policy_version 160102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:58,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44646.5, 300 sec: 43292.7). Total num frames: 1311596544. Throughput: 0: 11006.5. Samples: 77872770. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:21:58,976][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:21:59,821][626795] Updated weights for policy 0, policy_version 160112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:01,602][626795] Updated weights for policy 0, policy_version 160122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:05,361][24592] Fps is (10 sec: 35256.5, 60 sec: 42704.7, 300 sec: 42869.2). Total num frames: 1311760384. Throughput: 0: 9874.3. Samples: 77907738. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:05,363][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:06,163][626795] Updated weights for policy 0, policy_version 160132 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:07,948][626795] Updated weights for policy 0, policy_version 160142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:08,976][24592] Fps is (10 sec: 32766.6, 60 sec: 42461.5, 300 sec: 42820.5). Total num frames: 1311924224. Throughput: 0: 10206.1. Samples: 77979060. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:08,978][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:09,771][626795] Updated weights for policy 0, policy_version 160152 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:11,687][626795] Updated weights for policy 0, policy_version 160162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:13,419][626795] Updated weights for policy 0, policy_version 160172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:13,975][24592] Fps is (10 sec: 44694.3, 60 sec: 42188.8, 300 sec: 42792.8). Total num frames: 1312145408. Throughput: 0: 10495.6. Samples: 78012744. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:13,977][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:15,178][626795] Updated weights for policy 0, policy_version 160182 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:16,888][626795] Updated weights for policy 0, policy_version 160192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:18,588][626795] Updated weights for policy 0, policy_version 160202 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:18,975][24592] Fps is (10 sec: 45877.5, 60 sec: 42188.8, 300 sec: 43207.6). Total num frames: 1312382976. Throughput: 0: 10917.2. Samples: 78082944. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:18,977][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:20,343][626795] Updated weights for policy 0, policy_version 160212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:21,976][626795] Updated weights for policy 0, policy_version 160222 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:23,691][626795] Updated weights for policy 0, policy_version 160232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:23,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42325.9, 300 sec: 43264.9). Total num frames: 1312628736. Throughput: 0: 11030.1. Samples: 78155022. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:23,976][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:25,465][626795] Updated weights for policy 0, policy_version 160242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:27,164][626795] Updated weights for policy 0, policy_version 160252 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:28,804][626795] Updated weights for policy 0, policy_version 160262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:28,975][24592] Fps is (10 sec: 48332.7, 60 sec: 44567.9, 300 sec: 43264.9). Total num frames: 1312866304. Throughput: 0: 11045.9. Samples: 78190944. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:28,976][24592] Avg episode reward: [(0, '4.382')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:30,618][626795] Updated weights for policy 0, policy_version 160272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:32,346][626795] Updated weights for policy 0, policy_version 160282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:33,976][24592] Fps is (10 sec: 47512.7, 60 sec: 44783.2, 300 sec: 43264.8). Total num frames: 1313103872. Throughput: 0: 11030.8. Samples: 78261816. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:33,976][24592] Avg episode reward: [(0, '4.450')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:34,105][626795] Updated weights for policy 0, policy_version 160292 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:35,841][626795] Updated weights for policy 0, policy_version 160302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:40,408][24592] Fps is (10 sec: 35110.6, 60 sec: 42805.5, 300 sec: 42834.7). Total num frames: 1313267712. Throughput: 0: 9887.7. Samples: 78296994. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:40,409][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:40,423][626795] Updated weights for policy 0, policy_version 160312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:42,265][626795] Updated weights for policy 0, policy_version 160322 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:43,975][24592] Fps is (10 sec: 31949.3, 60 sec: 42325.3, 300 sec: 42792.8). Total num frames: 1313423360. Throughput: 0: 10226.9. Samples: 78332982. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:43,976][24592] Avg episode reward: [(0, '4.246')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:44,277][626795] Updated weights for policy 0, policy_version 160332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:46,105][626795] Updated weights for policy 0, policy_version 160342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:47,963][626795] Updated weights for policy 0, policy_version 160352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:48,976][24592] Fps is (10 sec: 44939.5, 60 sec: 42188.6, 300 sec: 42765.0). Total num frames: 1313652736. Throughput: 0: 11249.1. Samples: 78398364. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:48,978][24592] Avg episode reward: [(0, '4.490')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:49,716][626795] Updated weights for policy 0, policy_version 160362 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:51,389][626795] Updated weights for policy 0, policy_version 160372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:53,143][626795] Updated weights for policy 0, policy_version 160382 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:53,975][24592] Fps is (10 sec: 46694.7, 60 sec: 42188.8, 300 sec: 43167.1). Total num frames: 1313890304. Throughput: 0: 10894.5. Samples: 78469308. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:53,977][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:54,893][626795] Updated weights for policy 0, policy_version 160392 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:56,504][626795] Updated weights for policy 0, policy_version 160402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:22:58,245][626795] Updated weights for policy 0, policy_version 160412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:58,975][24592] Fps is (10 sec: 46695.5, 60 sec: 42052.3, 300 sec: 43181.6). Total num frames: 1314119680. Throughput: 0: 10929.7. Samples: 78504582. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:22:58,976][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:00,024][626795] Updated weights for policy 0, policy_version 160422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:01,834][626795] Updated weights for policy 0, policy_version 160432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:03,494][626795] Updated weights for policy 0, policy_version 160442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:03,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44443.8, 300 sec: 43237.2). Total num frames: 1314365440. Throughput: 0: 10954.9. Samples: 78575916. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:03,976][24592] Avg episode reward: [(0, '4.257')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000160445_1314365440.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:04,047][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000159187_1304059904.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:05,252][626795] Updated weights for policy 0, policy_version 160452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:06,993][626795] Updated weights for policy 0, policy_version 160462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:08,741][626795] Updated weights for policy 0, policy_version 160472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:08,976][24592] Fps is (10 sec: 47511.3, 60 sec: 44509.8, 300 sec: 43209.3). Total num frames: 1314594816. Throughput: 0: 10926.4. Samples: 78646716. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:08,977][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:10,433][626795] Updated weights for policy 0, policy_version 160482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:12,192][626795] Updated weights for policy 0, policy_version 160492 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:15,484][24592] Fps is (10 sec: 34878.2, 60 sec: 42618.9, 300 sec: 42796.1). Total num frames: 1314766848. Throughput: 0: 10561.2. Samples: 78682134. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:15,485][24592] Avg episode reward: [(0, '4.402')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:16,893][626795] Updated weights for policy 0, policy_version 160502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:18,695][626795] Updated weights for policy 0, policy_version 160512 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:18,975][24592] Fps is (10 sec: 32769.4, 60 sec: 42325.2, 300 sec: 42737.3). Total num frames: 1314922496. Throughput: 0: 10127.9. Samples: 78717570. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:18,976][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:20,621][626795] Updated weights for policy 0, policy_version 160522 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:22,329][626795] Updated weights for policy 0, policy_version 160532 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:23,975][24592] Fps is (10 sec: 44378.9, 60 sec: 41915.7, 300 sec: 42681.7). Total num frames: 1315143680. Throughput: 0: 11188.5. Samples: 78784446. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:23,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:24,174][626795] Updated weights for policy 0, policy_version 160542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:25,954][626795] Updated weights for policy 0, policy_version 160552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:27,654][626795] Updated weights for policy 0, policy_version 160562 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:28,976][24592] Fps is (10 sec: 45875.4, 60 sec: 41915.7, 300 sec: 43086.9). Total num frames: 1315381248. Throughput: 0: 10816.3. Samples: 78819714. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:28,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:29,430][626795] Updated weights for policy 0, policy_version 160572 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:31,109][626795] Updated weights for policy 0, policy_version 160582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:32,899][626795] Updated weights for policy 0, policy_version 160592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:33,975][24592] Fps is (10 sec: 47513.9, 60 sec: 41915.9, 300 sec: 43126.0). Total num frames: 1315618816. Throughput: 0: 10941.8. Samples: 78890742. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:33,977][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:34,538][626795] Updated weights for policy 0, policy_version 160602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:36,320][626795] Updated weights for policy 0, policy_version 160612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:37,870][626795] Updated weights for policy 0, policy_version 160622 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:38,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44199.9, 300 sec: 43181.6). Total num frames: 1315856384. Throughput: 0: 10960.5. Samples: 78962532. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:38,976][24592] Avg episode reward: [(0, '4.381')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:39,718][626795] Updated weights for policy 0, policy_version 160632 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:41,506][626795] Updated weights for policy 0, policy_version 160642 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:43,142][626795] Updated weights for policy 0, policy_version 160652 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:43,976][24592] Fps is (10 sec: 47511.5, 60 sec: 44509.6, 300 sec: 43153.7). Total num frames: 1316093952. Throughput: 0: 10957.8. Samples: 78997686. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:43,977][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:44,940][626795] Updated weights for policy 0, policy_version 160662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:46,639][626795] Updated weights for policy 0, policy_version 160672 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:50,391][24592] Fps is (10 sec: 35879.5, 60 sec: 42550.2, 300 sec: 42726.6). Total num frames: 1316265984. Throughput: 0: 10608.5. Samples: 79068318. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:50,393][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:51,164][626795] Updated weights for policy 0, policy_version 160682 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:52,900][626795] Updated weights for policy 0, policy_version 160692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:53,975][24592] Fps is (10 sec: 33588.4, 60 sec: 42325.2, 300 sec: 42709.5). Total num frames: 1316429824. Throughput: 0: 10189.0. Samples: 79105218. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:53,977][24592] Avg episode reward: [(0, '4.499')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:54,207][626772] Signal inference workers to stop experience collection... (1150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:54,207][626772] Signal inference workers to resume experience collection... (1150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:54,216][626795] InferenceWorker_p0-w0: stopping experience collection (1150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:54,222][626795] InferenceWorker_p0-w0: resuming experience collection (1150 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:54,908][626795] Updated weights for policy 0, policy_version 160702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:56,593][626795] Updated weights for policy 0, policy_version 160712 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:23:58,452][626795] Updated weights for policy 0, policy_version 160722 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:58,975][24592] Fps is (10 sec: 45807.4, 60 sec: 42325.3, 300 sec: 42709.5). Total num frames: 1316659200. Throughput: 0: 10493.7. Samples: 79138518. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:23:58,977][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:00,139][626795] Updated weights for policy 0, policy_version 160732 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:01,928][626795] Updated weights for policy 0, policy_version 160742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:03,631][626795] Updated weights for policy 0, policy_version 160752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:03,976][24592] Fps is (10 sec: 45872.3, 60 sec: 42051.7, 300 sec: 43114.6). Total num frames: 1316888576. Throughput: 0: 10922.1. Samples: 79209072. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:03,977][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:05,439][626795] Updated weights for policy 0, policy_version 160762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:07,009][626795] Updated weights for policy 0, policy_version 160772 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:08,821][626795] Updated weights for policy 0, policy_version 160782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:08,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42325.6, 300 sec: 43153.8). Total num frames: 1317134336. Throughput: 0: 11024.9. Samples: 79280568. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:08,979][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:10,386][626795] Updated weights for policy 0, policy_version 160792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:12,119][626795] Updated weights for policy 0, policy_version 160802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:13,845][626795] Updated weights for policy 0, policy_version 160812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:13,976][24592] Fps is (10 sec: 49154.7, 60 sec: 44677.5, 300 sec: 43237.1). Total num frames: 1317380096. Throughput: 0: 11046.2. Samples: 79316796. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:13,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:15,619][626795] Updated weights for policy 0, policy_version 160822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:17,400][626795] Updated weights for policy 0, policy_version 160832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:18,976][24592] Fps is (10 sec: 47513.1, 60 sec: 44782.8, 300 sec: 43181.5). Total num frames: 1317609472. Throughput: 0: 11048.3. Samples: 79387920. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:18,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:19,027][626795] Updated weights for policy 0, policy_version 160842 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:20,808][626795] Updated weights for policy 0, policy_version 160852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:22,613][626795] Updated weights for policy 0, policy_version 160862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:25,289][24592] Fps is (10 sec: 35479.1, 60 sec: 43021.5, 300 sec: 42768.9). Total num frames: 1317781504. Throughput: 0: 9956.4. Samples: 79423656. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:25,291][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:27,060][626795] Updated weights for policy 0, policy_version 160872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:28,883][626795] Updated weights for policy 0, policy_version 160882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:28,975][24592] Fps is (10 sec: 33587.7, 60 sec: 42734.9, 300 sec: 42737.3). Total num frames: 1317945344. Throughput: 0: 10311.2. Samples: 79461684. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:28,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:30,702][626795] Updated weights for policy 0, policy_version 160892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:32,425][626795] Updated weights for policy 0, policy_version 160902 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:33,976][24592] Fps is (10 sec: 45269.4, 60 sec: 42598.2, 300 sec: 42737.2). Total num frames: 1318174720. Throughput: 0: 10600.7. Samples: 79530342. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:33,977][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:34,202][626795] Updated weights for policy 0, policy_version 160912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:35,862][626795] Updated weights for policy 0, policy_version 160922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:37,657][626795] Updated weights for policy 0, policy_version 160932 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:38,975][24592] Fps is (10 sec: 46694.8, 60 sec: 42598.4, 300 sec: 43159.6). Total num frames: 1318412288. Throughput: 0: 11033.1. Samples: 79601706. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:38,976][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:39,449][626795] Updated weights for policy 0, policy_version 160942 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:41,078][626795] Updated weights for policy 0, policy_version 160952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:42,811][626795] Updated weights for policy 0, policy_version 160962 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:43,975][24592] Fps is (10 sec: 48333.5, 60 sec: 42735.2, 300 sec: 43237.1). Total num frames: 1318658048. Throughput: 0: 11077.3. Samples: 79636998. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:43,976][24592] Avg episode reward: [(0, '4.389')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:44,415][626795] Updated weights for policy 0, policy_version 160972 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:46,188][626795] Updated weights for policy 0, policy_version 160982 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:47,908][626795] Updated weights for policy 0, policy_version 160992 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:48,975][24592] Fps is (10 sec: 48332.5, 60 sec: 44886.5, 300 sec: 43264.9). Total num frames: 1318895616. Throughput: 0: 11130.3. Samples: 79709928. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:48,977][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:49,652][626795] Updated weights for policy 0, policy_version 161002 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:51,343][626795] Updated weights for policy 0, policy_version 161012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:53,228][626795] Updated weights for policy 0, policy_version 161022 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:53,975][24592] Fps is (10 sec: 46695.0, 60 sec: 44919.6, 300 sec: 43237.1). Total num frames: 1319124992. Throughput: 0: 11073.4. Samples: 79778868. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:24:53,977][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:54,989][626795] Updated weights for policy 0, policy_version 161032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:24:56,730][626795] Updated weights for policy 0, policy_version 161042 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:00,229][24592] Fps is (10 sec: 34942.7, 60 sec: 42930.6, 300 sec: 42805.3). Total num frames: 1319288832. Throughput: 0: 10735.7. Samples: 79813356. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:00,231][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:01,242][626795] Updated weights for policy 0, policy_version 161052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:03,181][626795] Updated weights for policy 0, policy_version 161062 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:03,976][24592] Fps is (10 sec: 31946.4, 60 sec: 42598.4, 300 sec: 42681.6). Total num frames: 1319444480. Throughput: 0: 10282.0. Samples: 79850616. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:03,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000161066_1319452672.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:04,052][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000159808_1309147136.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:04,934][626795] Updated weights for policy 0, policy_version 161072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:06,822][626795] Updated weights for policy 0, policy_version 161082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:08,597][626795] Updated weights for policy 0, policy_version 161092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:08,975][24592] Fps is (10 sec: 45892.2, 60 sec: 42598.5, 300 sec: 42709.5). Total num frames: 1319690240. Throughput: 0: 11335.8. Samples: 79918872. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:08,976][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:10,176][626795] Updated weights for policy 0, policy_version 161102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:11,938][626795] Updated weights for policy 0, policy_version 161112 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:13,726][626795] Updated weights for policy 0, policy_version 161122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:13,976][24592] Fps is (10 sec: 47514.8, 60 sec: 42325.1, 300 sec: 43157.2). Total num frames: 1319919616. Throughput: 0: 10945.4. Samples: 79954230. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:13,978][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:15,382][626795] Updated weights for policy 0, policy_version 161132 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:17,062][626795] Updated weights for policy 0, policy_version 161142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:18,838][626795] Updated weights for policy 0, policy_version 161152 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:18,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42598.6, 300 sec: 43209.3). Total num frames: 1320165376. Throughput: 0: 11016.6. Samples: 80026086. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:18,978][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:20,430][626795] Updated weights for policy 0, policy_version 161162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:22,194][626795] Updated weights for policy 0, policy_version 161172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:23,950][626795] Updated weights for policy 0, policy_version 161182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:23,976][24592] Fps is (10 sec: 48333.1, 60 sec: 44668.7, 300 sec: 43237.2). Total num frames: 1320402944. Throughput: 0: 11035.6. Samples: 80098314. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:23,978][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:25,700][626795] Updated weights for policy 0, policy_version 161192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:27,398][626795] Updated weights for policy 0, policy_version 161202 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:28,975][24592] Fps is (10 sec: 46694.5, 60 sec: 44783.0, 300 sec: 43209.3). Total num frames: 1320632320. Throughput: 0: 11028.3. Samples: 80133270. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:28,979][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:29,282][626795] Updated weights for policy 0, policy_version 161212 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:30,898][626795] Updated weights for policy 0, policy_version 161222 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:35,153][24592] Fps is (10 sec: 35180.8, 60 sec: 42850.0, 300 sec: 42788.7). Total num frames: 1320796160. Throughput: 0: 9919.5. Samples: 80167986. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:35,153][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:35,469][626795] Updated weights for policy 0, policy_version 161232 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:37,294][626795] Updated weights for policy 0, policy_version 161242 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:38,976][24592] Fps is (10 sec: 33585.2, 60 sec: 42598.0, 300 sec: 42737.2). Total num frames: 1320968192. Throughput: 0: 10235.1. Samples: 80239452. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:38,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:39,145][626795] Updated weights for policy 0, policy_version 161252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:40,850][626795] Updated weights for policy 0, policy_version 161262 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:42,710][626795] Updated weights for policy 0, policy_version 161272 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:43,975][24592] Fps is (10 sec: 45497.8, 60 sec: 42325.4, 300 sec: 42709.5). Total num frames: 1321197568. Throughput: 0: 10518.5. Samples: 80273508. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:43,976][24592] Avg episode reward: [(0, '4.490')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:44,276][626795] Updated weights for policy 0, policy_version 161282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:46,039][626795] Updated weights for policy 0, policy_version 161292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:47,783][626795] Updated weights for policy 0, policy_version 161302 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:48,976][24592] Fps is (10 sec: 47515.0, 60 sec: 42461.7, 300 sec: 43190.2). Total num frames: 1321443328. Throughput: 0: 11014.9. Samples: 80346282. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:48,976][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:49,432][626795] Updated weights for policy 0, policy_version 161312 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:51,208][626795] Updated weights for policy 0, policy_version 161322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:53,011][626795] Updated weights for policy 0, policy_version 161332 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:53,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42461.9, 300 sec: 43237.1). Total num frames: 1321672704. Throughput: 0: 11063.1. Samples: 80416710. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:53,979][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:54,686][626795] Updated weights for policy 0, policy_version 161342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:56,427][626795] Updated weights for policy 0, policy_version 161352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:58,105][626795] Updated weights for policy 0, policy_version 161362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:58,975][24592] Fps is (10 sec: 47514.8, 60 sec: 44762.1, 300 sec: 43320.4). Total num frames: 1321918464. Throughput: 0: 11070.5. Samples: 80452398. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:25:58,978][24592] Avg episode reward: [(0, '4.263')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:25:59,981][626795] Updated weights for policy 0, policy_version 161372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:01,615][626795] Updated weights for policy 0, policy_version 161382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:03,440][626795] Updated weights for policy 0, policy_version 161392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:03,975][24592] Fps is (10 sec: 47513.4, 60 sec: 45056.5, 300 sec: 43292.6). Total num frames: 1322147840. Throughput: 0: 11027.1. Samples: 80522304. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:03,978][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:05,225][626795] Updated weights for policy 0, policy_version 161402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:07,045][626795] Updated weights for policy 0, policy_version 161412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:10,106][24592] Fps is (10 sec: 33855.7, 60 sec: 42614.6, 300 sec: 42823.1). Total num frames: 1322295296. Throughput: 0: 9937.7. Samples: 80556744. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:10,108][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:11,578][626795] Updated weights for policy 0, policy_version 161422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:13,435][626795] Updated weights for policy 0, policy_version 161432 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:13,975][24592] Fps is (10 sec: 32768.2, 60 sec: 42598.7, 300 sec: 42792.8). Total num frames: 1322475520. Throughput: 0: 10225.5. Samples: 80593416. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:13,977][24592] Avg episode reward: [(0, '4.320')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:15,218][626795] Updated weights for policy 0, policy_version 161442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:17,026][626795] Updated weights for policy 0, policy_version 161452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:18,729][626795] Updated weights for policy 0, policy_version 161462 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:18,975][24592] Fps is (10 sec: 46181.1, 60 sec: 42325.3, 300 sec: 42765.1). Total num frames: 1322704896. Throughput: 0: 11276.1. Samples: 80662134. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:18,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:20,421][626795] Updated weights for policy 0, policy_version 161472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:22,120][626795] Updated weights for policy 0, policy_version 161482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:23,799][626795] Updated weights for policy 0, policy_version 161492 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:23,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42325.6, 300 sec: 43214.9). Total num frames: 1322942464. Throughput: 0: 10987.3. Samples: 80733876. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:23,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:25,512][626795] Updated weights for policy 0, policy_version 161502 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:27,230][626795] Updated weights for policy 0, policy_version 161512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:28,897][626795] Updated weights for policy 0, policy_version 161522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:28,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42598.4, 300 sec: 43292.7). Total num frames: 1323188224. Throughput: 0: 11045.1. Samples: 80770536. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:28,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:30,642][626795] Updated weights for policy 0, policy_version 161532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:32,338][626795] Updated weights for policy 0, policy_version 161542 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:33,976][24592] Fps is (10 sec: 48331.4, 60 sec: 44704.2, 300 sec: 43348.2). Total num frames: 1323425792. Throughput: 0: 11023.3. Samples: 80842332. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:33,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:34,154][626795] Updated weights for policy 0, policy_version 161552 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:35,860][626795] Updated weights for policy 0, policy_version 161562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:37,600][626795] Updated weights for policy 0, policy_version 161572 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:38,976][24592] Fps is (10 sec: 46693.5, 60 sec: 44783.3, 300 sec: 43292.6). Total num frames: 1323655168. Throughput: 0: 10995.3. Samples: 80911500. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:38,978][24592] Avg episode reward: [(0, '4.384')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:39,381][626795] Updated weights for policy 0, policy_version 161582 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:41,140][626795] Updated weights for policy 0, policy_version 161592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:45,076][24592] Fps is (10 sec: 33948.3, 60 sec: 42635.6, 300 sec: 42827.4). Total num frames: 1323802624. Throughput: 0: 10698.2. Samples: 80945592. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:45,077][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:45,838][626795] Updated weights for policy 0, policy_version 161602 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:47,679][626795] Updated weights for policy 0, policy_version 161612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:48,975][24592] Fps is (10 sec: 32768.5, 60 sec: 42325.5, 300 sec: 42792.8). Total num frames: 1323982848. Throughput: 0: 10216.8. Samples: 80982060. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:48,976][24592] Avg episode reward: [(0, '4.260')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:49,493][626795] Updated weights for policy 0, policy_version 161622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:51,294][626795] Updated weights for policy 0, policy_version 161632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:53,002][626795] Updated weights for policy 0, policy_version 161642 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:53,975][24592] Fps is (10 sec: 46024.8, 60 sec: 42325.3, 300 sec: 42765.0). Total num frames: 1324212224. Throughput: 0: 11269.4. Samples: 81051126. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:53,978][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:54,816][626795] Updated weights for policy 0, policy_version 161652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:56,472][626795] Updated weights for policy 0, policy_version 161662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:58,064][626795] Updated weights for policy 0, policy_version 161672 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:58,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42188.8, 300 sec: 43217.9). Total num frames: 1324449792. Throughput: 0: 10961.1. Samples: 81086664. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:26:58,977][24592] Avg episode reward: [(0, '4.312')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:26:59,822][626795] Updated weights for policy 0, policy_version 161682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:01,559][626795] Updated weights for policy 0, policy_version 161692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:03,200][626795] Updated weights for policy 0, policy_version 161702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:03,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42461.9, 300 sec: 43292.7). Total num frames: 1324695552. Throughput: 0: 11047.6. Samples: 81159276. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:03,976][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000161706_1324695552.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:04,051][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000160445_1314365440.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:05,005][626795] Updated weights for policy 0, policy_version 161712 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:06,642][626795] Updated weights for policy 0, policy_version 161722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:08,416][626795] Updated weights for policy 0, policy_version 161732 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:08,975][24592] Fps is (10 sec: 48332.7, 60 sec: 44808.0, 300 sec: 43348.2). Total num frames: 1324933120. Throughput: 0: 11036.3. Samples: 81230508. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:08,977][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:10,284][626795] Updated weights for policy 0, policy_version 161742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:11,858][626795] Updated weights for policy 0, policy_version 161752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:13,697][626795] Updated weights for policy 0, policy_version 161762 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:13,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44782.9, 300 sec: 43320.4). Total num frames: 1325162496. Throughput: 0: 10984.0. Samples: 81264816. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:13,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:15,541][626795] Updated weights for policy 0, policy_version 161772 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:20,006][24592] Fps is (10 sec: 34162.7, 60 sec: 42684.5, 300 sec: 42837.5). Total num frames: 1325309952. Throughput: 0: 9936.0. Samples: 81299688. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:20,010][24592] Avg episode reward: [(0, '4.424')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:20,040][626795] Updated weights for policy 0, policy_version 161782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:21,880][626795] Updated weights for policy 0, policy_version 161792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:23,767][626795] Updated weights for policy 0, policy_version 161802 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:23,975][24592] Fps is (10 sec: 32768.0, 60 sec: 42461.9, 300 sec: 42792.8). Total num frames: 1325490176. Throughput: 0: 10198.0. Samples: 81370410. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:23,976][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:25,420][626795] Updated weights for policy 0, policy_version 161812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:27,159][626795] Updated weights for policy 0, policy_version 161822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:28,975][24592] Fps is (10 sec: 45666.2, 60 sec: 42188.8, 300 sec: 42765.0). Total num frames: 1325719552. Throughput: 0: 10464.9. Samples: 81404994. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:28,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:29,008][626795] Updated weights for policy 0, policy_version 161832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:30,723][626795] Updated weights for policy 0, policy_version 161842 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:32,304][626795] Updated weights for policy 0, policy_version 161852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:33,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42325.5, 300 sec: 43252.8). Total num frames: 1325965312. Throughput: 0: 10994.5. Samples: 81476814. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:33,977][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:34,097][626795] Updated weights for policy 0, policy_version 161862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:35,790][626795] Updated weights for policy 0, policy_version 161872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:37,429][626795] Updated weights for policy 0, policy_version 161882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:38,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42462.0, 300 sec: 43320.4). Total num frames: 1326202880. Throughput: 0: 11070.9. Samples: 81549318. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:38,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:39,151][626795] Updated weights for policy 0, policy_version 161892 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:40,871][626795] Updated weights for policy 0, policy_version 161902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:42,589][626795] Updated weights for policy 0, policy_version 161912 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44785.1, 300 sec: 43348.2). Total num frames: 1326440448. Throughput: 0: 11075.7. Samples: 81585072. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:43,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:44,365][626795] Updated weights for policy 0, policy_version 161922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:46,181][626795] Updated weights for policy 0, policy_version 161932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:47,867][626795] Updated weights for policy 0, policy_version 161942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:48,976][24592] Fps is (10 sec: 47512.8, 60 sec: 44919.4, 300 sec: 43348.1). Total num frames: 1326678016. Throughput: 0: 11018.8. Samples: 81655122. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:48,978][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:49,684][626795] Updated weights for policy 0, policy_version 161952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:51,553][626795] Updated weights for policy 0, policy_version 161962 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:54,942][24592] Fps is (10 sec: 34360.3, 60 sec: 42728.9, 300 sec: 42902.1). Total num frames: 1326817280. Throughput: 0: 9982.9. Samples: 81689394. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:54,943][24592] Avg episode reward: [(0, '4.922')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:56,102][626795] Updated weights for policy 0, policy_version 161972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:57,829][626795] Updated weights for policy 0, policy_version 161982 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:58,975][24592] Fps is (10 sec: 31949.1, 60 sec: 42461.8, 300 sec: 42820.5). Total num frames: 1326997504. Throughput: 0: 10222.3. Samples: 81724818. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:27:58,977][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:27:59,667][626795] Updated weights for policy 0, policy_version 161992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:01,402][626795] Updated weights for policy 0, policy_version 162002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:03,172][626795] Updated weights for policy 0, policy_version 162012 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:03,975][24592] Fps is (10 sec: 47159.3, 60 sec: 42461.9, 300 sec: 42876.2). Total num frames: 1327243264. Throughput: 0: 11280.6. Samples: 81795690. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:03,977][24592] Avg episode reward: [(0, '4.317')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:04,812][626795] Updated weights for policy 0, policy_version 162022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:06,486][626795] Updated weights for policy 0, policy_version 162032 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:08,181][626795] Updated weights for policy 0, policy_version 162042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:08,975][24592] Fps is (10 sec: 48333.2, 60 sec: 42461.9, 300 sec: 43319.8). Total num frames: 1327480832. Throughput: 0: 11050.1. Samples: 81867666. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:08,976][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:09,953][626795] Updated weights for policy 0, policy_version 162052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:11,635][626795] Updated weights for policy 0, policy_version 162062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:13,323][626795] Updated weights for policy 0, policy_version 162072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:13,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42598.4, 300 sec: 43376.0). Total num frames: 1327718400. Throughput: 0: 11072.8. Samples: 81903270. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:13,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:15,104][626795] Updated weights for policy 0, policy_version 162082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:16,846][626795] Updated weights for policy 0, policy_version 162092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:18,621][626795] Updated weights for policy 0, policy_version 162102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:18,976][24592] Fps is (10 sec: 46693.9, 60 sec: 44732.0, 300 sec: 43403.7). Total num frames: 1327947776. Throughput: 0: 11065.6. Samples: 81974766. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:18,976][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:20,409][626795] Updated weights for policy 0, policy_version 162112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:22,181][626795] Updated weights for policy 0, policy_version 162122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:23,853][626795] Updated weights for policy 0, policy_version 162132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:23,975][24592] Fps is (10 sec: 46694.2, 60 sec: 44919.4, 300 sec: 43403.7). Total num frames: 1328185344. Throughput: 0: 11000.7. Samples: 82044348. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:23,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:25,759][626795] Updated weights for policy 0, policy_version 162142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:29,883][24592] Fps is (10 sec: 34548.5, 60 sec: 42770.7, 300 sec: 42938.4). Total num frames: 1328324608. Throughput: 0: 10728.9. Samples: 82077606. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:29,888][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:30,287][626795] Updated weights for policy 0, policy_version 162152 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:32,175][626795] Updated weights for policy 0, policy_version 162162 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:33,955][626795] Updated weights for policy 0, policy_version 162172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:33,976][24592] Fps is (10 sec: 32767.0, 60 sec: 42461.6, 300 sec: 42903.8). Total num frames: 1328513024. Throughput: 0: 10175.7. Samples: 82113030. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:33,978][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:35,859][626795] Updated weights for policy 0, policy_version 162182 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:37,540][626795] Updated weights for policy 0, policy_version 162192 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:38,975][24592] Fps is (10 sec: 45948.7, 60 sec: 42325.3, 300 sec: 42876.2). Total num frames: 1328742400. Throughput: 0: 11231.9. Samples: 82183968. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:38,976][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:39,133][626795] Updated weights for policy 0, policy_version 162202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:40,928][626795] Updated weights for policy 0, policy_version 162212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:42,663][626795] Updated weights for policy 0, policy_version 162222 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:43,975][24592] Fps is (10 sec: 46695.8, 60 sec: 42325.3, 300 sec: 43306.1). Total num frames: 1328979968. Throughput: 0: 10985.2. Samples: 82219152. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:43,976][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:44,423][626795] Updated weights for policy 0, policy_version 162232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:45,248][626772] Signal inference workers to stop experience collection... (1200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:45,254][626772] Signal inference workers to resume experience collection... (1200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:45,266][626795] InferenceWorker_p0-w0: stopping experience collection (1200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:45,272][626795] InferenceWorker_p0-w0: resuming experience collection (1200 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:46,002][626795] Updated weights for policy 0, policy_version 162242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:47,842][626795] Updated weights for policy 0, policy_version 162252 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:48,976][24592] Fps is (10 sec: 47512.2, 60 sec: 42325.2, 300 sec: 43348.1). Total num frames: 1329217536. Throughput: 0: 11003.4. Samples: 82290846. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:48,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:49,439][626795] Updated weights for policy 0, policy_version 162262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:51,157][626795] Updated weights for policy 0, policy_version 162272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:52,866][626795] Updated weights for policy 0, policy_version 162282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:53,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44684.0, 300 sec: 43376.0). Total num frames: 1329455104. Throughput: 0: 11000.7. Samples: 82362696. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:53,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:54,706][626795] Updated weights for policy 0, policy_version 162292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:56,392][626795] Updated weights for policy 0, policy_version 162302 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:58,147][626795] Updated weights for policy 0, policy_version 162312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:58,977][24592] Fps is (10 sec: 47506.9, 60 sec: 44918.2, 300 sec: 43403.6). Total num frames: 1329692672. Throughput: 0: 10992.3. Samples: 82397940. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:28:58,978][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:28:59,884][626795] Updated weights for policy 0, policy_version 162322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:01,884][626795] Updated weights for policy 0, policy_version 162332 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:04,811][24592] Fps is (10 sec: 34776.4, 60 sec: 42551.7, 300 sec: 42921.1). Total num frames: 1329831936. Throughput: 0: 10001.1. Samples: 82433172. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:04,812][24592] Avg episode reward: [(0, '4.459')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:04,822][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000162333_1329831936.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:04,906][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000161066_1319452672.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:06,542][626795] Updated weights for policy 0, policy_version 162342 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:08,539][626795] Updated weights for policy 0, policy_version 162352 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:08,976][24592] Fps is (10 sec: 31132.9, 60 sec: 42051.8, 300 sec: 42792.7). Total num frames: 1330003968. Throughput: 0: 10115.6. Samples: 82499556. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:08,978][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:10,229][626795] Updated weights for policy 0, policy_version 162362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:11,963][626795] Updated weights for policy 0, policy_version 162372 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:13,682][626795] Updated weights for policy 0, policy_version 162382 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:13,976][24592] Fps is (10 sec: 44694.9, 60 sec: 42052.1, 300 sec: 42820.6). Total num frames: 1330241536. Throughput: 0: 10370.7. Samples: 82534878. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:13,977][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:15,446][626795] Updated weights for policy 0, policy_version 162392 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:17,134][626795] Updated weights for policy 0, policy_version 162402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:18,818][626795] Updated weights for policy 0, policy_version 162412 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:18,975][24592] Fps is (10 sec: 47516.3, 60 sec: 42188.8, 300 sec: 43235.3). Total num frames: 1330479104. Throughput: 0: 10961.3. Samples: 82606284. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:18,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:20,484][626795] Updated weights for policy 0, policy_version 162422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:22,278][626795] Updated weights for policy 0, policy_version 162432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:23,962][626795] Updated weights for policy 0, policy_version 162442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:23,976][24592] Fps is (10 sec: 48329.7, 60 sec: 42324.8, 300 sec: 43320.3). Total num frames: 1330724864. Throughput: 0: 10985.0. Samples: 82678302. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:23,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:25,612][626795] Updated weights for policy 0, policy_version 162452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:27,320][626795] Updated weights for policy 0, policy_version 162462 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:28,975][24592] Fps is (10 sec: 48333.1, 60 sec: 44638.8, 300 sec: 43348.2). Total num frames: 1330962432. Throughput: 0: 11009.6. Samples: 82714584. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:28,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:29,075][626795] Updated weights for policy 0, policy_version 162472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:30,772][626795] Updated weights for policy 0, policy_version 162482 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:32,488][626795] Updated weights for policy 0, policy_version 162492 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:33,975][24592] Fps is (10 sec: 47517.1, 60 sec: 44783.1, 300 sec: 43348.2). Total num frames: 1331200000. Throughput: 0: 10998.2. Samples: 82785762. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:33,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:34,245][626795] Updated weights for policy 0, policy_version 162502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:36,168][626795] Updated weights for policy 0, policy_version 162512 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:39,769][24592] Fps is (10 sec: 34152.7, 60 sec: 42581.1, 300 sec: 42844.1). Total num frames: 1331331072. Throughput: 0: 9977.6. Samples: 82819608. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:39,771][24592] Avg episode reward: [(0, '4.327')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:40,837][626795] Updated weights for policy 0, policy_version 162522 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:42,735][626795] Updated weights for policy 0, policy_version 162532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:43,976][24592] Fps is (10 sec: 31127.7, 60 sec: 42188.3, 300 sec: 42764.9). Total num frames: 1331511296. Throughput: 0: 10143.4. Samples: 82854384. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:43,978][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:44,622][626795] Updated weights for policy 0, policy_version 162542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:46,543][626795] Updated weights for policy 0, policy_version 162552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:48,232][626795] Updated weights for policy 0, policy_version 162562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:48,975][24592] Fps is (10 sec: 44492.2, 60 sec: 42052.5, 300 sec: 42765.0). Total num frames: 1331740672. Throughput: 0: 11049.9. Samples: 82921182. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:48,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:49,999][626795] Updated weights for policy 0, policy_version 162572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:51,904][626795] Updated weights for policy 0, policy_version 162582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:53,636][626795] Updated weights for policy 0, policy_version 162592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:53,975][24592] Fps is (10 sec: 45878.5, 60 sec: 41915.7, 300 sec: 43170.6). Total num frames: 1331970048. Throughput: 0: 10878.7. Samples: 82989090. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:53,976][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:55,497][626795] Updated weights for policy 0, policy_version 162602 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:57,293][626795] Updated weights for policy 0, policy_version 162612 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:29:58,962][626795] Updated weights for policy 0, policy_version 162622 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:58,975][24592] Fps is (10 sec: 45875.2, 60 sec: 41780.4, 300 sec: 43237.2). Total num frames: 1332199424. Throughput: 0: 10835.1. Samples: 83022456. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:29:58,976][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:00,830][626795] Updated weights for policy 0, policy_version 162632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:02,522][626795] Updated weights for policy 0, policy_version 162642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:03,975][24592] Fps is (10 sec: 45055.7, 60 sec: 43754.0, 300 sec: 43153.8). Total num frames: 1332420608. Throughput: 0: 10806.5. Samples: 83092578. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:03,977][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:04,320][626795] Updated weights for policy 0, policy_version 162652 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:06,183][626795] Updated weights for policy 0, policy_version 162662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:07,935][626795] Updated weights for policy 0, policy_version 162672 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:08,975][24592] Fps is (10 sec: 45055.6, 60 sec: 44100.7, 300 sec: 43153.8). Total num frames: 1332649984. Throughput: 0: 10737.2. Samples: 83161470. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:08,978][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:09,802][626795] Updated weights for policy 0, policy_version 162682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:11,562][626795] Updated weights for policy 0, policy_version 162692 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:13,975][24592] Fps is (10 sec: 37683.5, 60 sec: 42598.5, 300 sec: 42820.6). Total num frames: 1332797440. Throughput: 0: 10675.3. Samples: 83194974. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:13,976][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:15,153][626795] Updated weights for policy 0, policy_version 162702 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:16,943][626795] Updated weights for policy 0, policy_version 162712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:18,731][626795] Updated weights for policy 0, policy_version 162722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:18,975][24592] Fps is (10 sec: 37683.2, 60 sec: 42461.9, 300 sec: 42792.8). Total num frames: 1333026816. Throughput: 0: 10168.1. Samples: 83243328. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:18,976][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:20,444][626795] Updated weights for policy 0, policy_version 162732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:22,277][626795] Updated weights for policy 0, policy_version 162742 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:23,946][626795] Updated weights for policy 0, policy_version 162752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:23,976][24592] Fps is (10 sec: 46692.6, 60 sec: 42325.7, 300 sec: 42820.5). Total num frames: 1333264384. Throughput: 0: 11160.9. Samples: 83312994. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:23,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:25,680][626795] Updated weights for policy 0, policy_version 162762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:27,278][626795] Updated weights for policy 0, policy_version 162772 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:28,976][24592] Fps is (10 sec: 47509.6, 60 sec: 42324.7, 300 sec: 43242.9). Total num frames: 1333501952. Throughput: 0: 11005.9. Samples: 83349654. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:28,990][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:29,056][626795] Updated weights for policy 0, policy_version 162782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:30,711][626795] Updated weights for policy 0, policy_version 162792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:32,470][626795] Updated weights for policy 0, policy_version 162802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:33,975][24592] Fps is (10 sec: 47515.5, 60 sec: 42325.4, 300 sec: 43292.7). Total num frames: 1333739520. Throughput: 0: 11130.0. Samples: 83422032. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:33,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:34,095][626795] Updated weights for policy 0, policy_version 162812 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:35,864][626795] Updated weights for policy 0, policy_version 162822 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:37,522][626795] Updated weights for policy 0, policy_version 162832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:38,975][24592] Fps is (10 sec: 48337.1, 60 sec: 44829.9, 300 sec: 43348.2). Total num frames: 1333985280. Throughput: 0: 11226.3. Samples: 83494272. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:38,977][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:39,270][626795] Updated weights for policy 0, policy_version 162842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:40,920][626795] Updated weights for policy 0, policy_version 162852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:42,701][626795] Updated weights for policy 0, policy_version 162862 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:43,976][24592] Fps is (10 sec: 48330.1, 60 sec: 45192.7, 300 sec: 43320.4). Total num frames: 1334222848. Throughput: 0: 11273.7. Samples: 83529780. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:43,978][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:44,543][626795] Updated weights for policy 0, policy_version 162872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:48,977][24592] Fps is (10 sec: 31943.5, 60 sec: 42733.7, 300 sec: 42820.3). Total num frames: 1334304768. Throughput: 0: 10461.8. Samples: 83563374. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:48,980][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:49,408][626795] Updated weights for policy 0, policy_version 162882 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:51,337][626795] Updated weights for policy 0, policy_version 162892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:53,212][626795] Updated weights for policy 0, policy_version 162902 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:53,975][24592] Fps is (10 sec: 29492.8, 60 sec: 42461.9, 300 sec: 42709.5). Total num frames: 1334517760. Throughput: 0: 10371.1. Samples: 83628168. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:53,977][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:55,119][626795] Updated weights for policy 0, policy_version 162912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:56,943][626795] Updated weights for policy 0, policy_version 162922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:30:58,683][626795] Updated weights for policy 0, policy_version 162932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:58,975][24592] Fps is (10 sec: 44244.4, 60 sec: 42461.9, 300 sec: 42709.5). Total num frames: 1334747136. Throughput: 0: 10375.9. Samples: 83661888. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:30:58,976][24592] Avg episode reward: [(0, '4.430')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:00,506][626795] Updated weights for policy 0, policy_version 162942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:02,208][626795] Updated weights for policy 0, policy_version 162952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:03,946][626795] Updated weights for policy 0, policy_version 162962 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:03,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42735.0, 300 sec: 43180.4). Total num frames: 1334984704. Throughput: 0: 10832.8. Samples: 83730804. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:03,976][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000162962_1334984704.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:04,035][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000161706_1324695552.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:05,734][626795] Updated weights for policy 0, policy_version 162972 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:07,526][626795] Updated weights for policy 0, policy_version 162982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:08,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42871.5, 300 sec: 43209.3). Total num frames: 1335222272. Throughput: 0: 10861.7. Samples: 83801766. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:08,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:09,241][626795] Updated weights for policy 0, policy_version 162992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:10,952][626795] Updated weights for policy 0, policy_version 163002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:12,718][626795] Updated weights for policy 0, policy_version 163012 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:13,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44236.8, 300 sec: 43209.3). Total num frames: 1335451648. Throughput: 0: 10819.8. Samples: 83836536. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:13,976][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:14,426][626795] Updated weights for policy 0, policy_version 163022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:16,110][626795] Updated weights for policy 0, policy_version 163032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:17,842][626795] Updated weights for policy 0, policy_version 163042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:18,975][24592] Fps is (10 sec: 46693.7, 60 sec: 44373.3, 300 sec: 43209.3). Total num frames: 1335689216. Throughput: 0: 10824.9. Samples: 83909154. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:18,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:19,428][626795] Updated weights for policy 0, policy_version 163052 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:23,549][626795] Updated weights for policy 0, policy_version 163062 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:23,976][24592] Fps is (10 sec: 36861.4, 60 sec: 42598.1, 300 sec: 42820.4). Total num frames: 1335820288. Throughput: 0: 10188.2. Samples: 83952750. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:23,978][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:25,300][626795] Updated weights for policy 0, policy_version 163072 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:27,092][626795] Updated weights for policy 0, policy_version 163082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:28,843][626795] Updated weights for policy 0, policy_version 163092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:28,975][24592] Fps is (10 sec: 36045.5, 60 sec: 42462.6, 300 sec: 42792.8). Total num frames: 1336049664. Throughput: 0: 10166.1. Samples: 83987250. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:28,976][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:30,592][626795] Updated weights for policy 0, policy_version 163102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:32,381][626795] Updated weights for policy 0, policy_version 163112 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:33,976][24592] Fps is (10 sec: 45877.1, 60 sec: 42325.1, 300 sec: 42792.8). Total num frames: 1336279040. Throughput: 0: 10974.9. Samples: 84057228. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:33,978][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:34,149][626795] Updated weights for policy 0, policy_version 163122 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:35,971][626795] Updated weights for policy 0, policy_version 163132 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:37,668][626795] Updated weights for policy 0, policy_version 163142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:38,975][24592] Fps is (10 sec: 46693.5, 60 sec: 42188.7, 300 sec: 43259.6). Total num frames: 1336516608. Throughput: 0: 11088.8. Samples: 84127164. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:38,976][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:39,422][626795] Updated weights for policy 0, policy_version 163152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:41,171][626795] Updated weights for policy 0, policy_version 163162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:42,914][626795] Updated weights for policy 0, policy_version 163172 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:43,976][24592] Fps is (10 sec: 47513.5, 60 sec: 42188.9, 300 sec: 43292.6). Total num frames: 1336754176. Throughput: 0: 11125.8. Samples: 84162552. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:43,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:44,499][626795] Updated weights for policy 0, policy_version 163182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:46,361][626795] Updated weights for policy 0, policy_version 163192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:48,068][626795] Updated weights for policy 0, policy_version 163202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:48,975][24592] Fps is (10 sec: 47514.3, 60 sec: 44784.2, 300 sec: 43320.4). Total num frames: 1336991744. Throughput: 0: 11160.8. Samples: 84233040. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:48,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:49,858][626795] Updated weights for policy 0, policy_version 163212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:51,675][626795] Updated weights for policy 0, policy_version 163222 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:53,393][626795] Updated weights for policy 0, policy_version 163232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:53,975][24592] Fps is (10 sec: 46696.4, 60 sec: 45056.1, 300 sec: 43292.6). Total num frames: 1337221120. Throughput: 0: 11136.9. Samples: 84302928. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:53,977][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:55,210][626795] Updated weights for policy 0, policy_version 163242 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:58,976][24592] Fps is (10 sec: 34406.2, 60 sec: 43144.5, 300 sec: 42848.3). Total num frames: 1337335808. Throughput: 0: 10892.4. Samples: 84326694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:31:58,977][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:31:59,419][626795] Updated weights for policy 0, policy_version 163252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:01,336][626795] Updated weights for policy 0, policy_version 163262 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:03,042][626795] Updated weights for policy 0, policy_version 163272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:03,976][24592] Fps is (10 sec: 34405.9, 60 sec: 43008.0, 300 sec: 42820.6). Total num frames: 1337565184. Throughput: 0: 10397.6. Samples: 84377046. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:03,977][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:04,832][626795] Updated weights for policy 0, policy_version 163282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:06,614][626795] Updated weights for policy 0, policy_version 163292 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:08,262][626795] Updated weights for policy 0, policy_version 163302 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:08,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42871.5, 300 sec: 42820.6). Total num frames: 1337794560. Throughput: 0: 10970.5. Samples: 84446412. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:08,977][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:10,123][626795] Updated weights for policy 0, policy_version 163312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:11,860][626795] Updated weights for policy 0, policy_version 163322 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:13,582][626795] Updated weights for policy 0, policy_version 163332 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:13,975][24592] Fps is (10 sec: 45875.5, 60 sec: 42871.5, 300 sec: 43249.3). Total num frames: 1338023936. Throughput: 0: 10978.7. Samples: 84481290. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:13,976][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:15,376][626795] Updated weights for policy 0, policy_version 163342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:17,020][626795] Updated weights for policy 0, policy_version 163352 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:18,769][626795] Updated weights for policy 0, policy_version 163362 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:18,975][24592] Fps is (10 sec: 47513.5, 60 sec: 43008.1, 300 sec: 43320.4). Total num frames: 1338269696. Throughput: 0: 10991.1. Samples: 84551826. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:18,979][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:20,579][626795] Updated weights for policy 0, policy_version 163372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:22,374][626795] Updated weights for policy 0, policy_version 163382 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:23,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44647.0, 300 sec: 43320.4). Total num frames: 1338499072. Throughput: 0: 10999.6. Samples: 84622146. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:23,976][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:24,144][626795] Updated weights for policy 0, policy_version 163392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:25,897][626795] Updated weights for policy 0, policy_version 163402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:27,616][626795] Updated weights for policy 0, policy_version 163412 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:28,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44782.9, 300 sec: 43292.6). Total num frames: 1338736640. Throughput: 0: 10979.3. Samples: 84656616. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:28,978][24592] Avg episode reward: [(0, '4.423')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:29,436][626795] Updated weights for policy 0, policy_version 163422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:33,775][626795] Updated weights for policy 0, policy_version 163432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:33,975][24592] Fps is (10 sec: 35225.5, 60 sec: 42871.7, 300 sec: 42876.1). Total num frames: 1338851328. Throughput: 0: 10416.9. Samples: 84701802. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:33,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:35,484][626795] Updated weights for policy 0, policy_version 163442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:37,171][626795] Updated weights for policy 0, policy_version 163452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:38,944][626795] Updated weights for policy 0, policy_version 163462 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:38,975][24592] Fps is (10 sec: 34406.2, 60 sec: 42735.0, 300 sec: 42848.3). Total num frames: 1339080704. Throughput: 0: 10321.7. Samples: 84767406. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:38,977][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:40,737][626795] Updated weights for policy 0, policy_version 163472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:42,523][626795] Updated weights for policy 0, policy_version 163482 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:43,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42598.7, 300 sec: 42820.6). Total num frames: 1339310080. Throughput: 0: 10561.5. Samples: 84801960. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:43,976][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:44,275][626795] Updated weights for policy 0, policy_version 163492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:45,993][626795] Updated weights for policy 0, policy_version 163502 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:47,749][626795] Updated weights for policy 0, policy_version 163512 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:48,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42461.9, 300 sec: 43267.9). Total num frames: 1339539456. Throughput: 0: 10991.2. Samples: 84871650. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:48,976][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:49,534][626795] Updated weights for policy 0, policy_version 163522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:51,296][626795] Updated weights for policy 0, policy_version 163532 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:53,033][626795] Updated weights for policy 0, policy_version 163542 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:53,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42598.3, 300 sec: 43320.4). Total num frames: 1339777024. Throughput: 0: 11012.2. Samples: 84941964. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:53,977][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:54,873][626795] Updated weights for policy 0, policy_version 163552 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:56,480][626795] Updated weights for policy 0, policy_version 163562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:32:58,236][626795] Updated weights for policy 0, policy_version 163572 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:58,975][24592] Fps is (10 sec: 46694.0, 60 sec: 44509.9, 300 sec: 43264.9). Total num frames: 1340006400. Throughput: 0: 11026.0. Samples: 84977460. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:32:58,977][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:00,059][626795] Updated weights for policy 0, policy_version 163582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:01,748][626795] Updated weights for policy 0, policy_version 163592 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:03,529][626795] Updated weights for policy 0, policy_version 163602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:03,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44646.4, 300 sec: 43264.9). Total num frames: 1340243968. Throughput: 0: 11005.1. Samples: 85047054. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:03,976][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000163604_1340243968.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:04,060][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000162333_1329831936.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:07,905][626795] Updated weights for policy 0, policy_version 163612 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:08,975][24592] Fps is (10 sec: 34406.4, 60 sec: 42598.3, 300 sec: 42820.5). Total num frames: 1340350464. Throughput: 0: 10316.5. Samples: 85086390. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:08,978][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:09,742][626795] Updated weights for policy 0, policy_version 163622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:11,401][626795] Updated weights for policy 0, policy_version 163632 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:13,361][626795] Updated weights for policy 0, policy_version 163642 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:13,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42598.4, 300 sec: 42820.6). Total num frames: 1340579840. Throughput: 0: 10327.2. Samples: 85121340. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:13,976][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:14,972][626795] Updated weights for policy 0, policy_version 163652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:16,743][626795] Updated weights for policy 0, policy_version 163662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:18,566][626795] Updated weights for policy 0, policy_version 163672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:18,975][24592] Fps is (10 sec: 46694.7, 60 sec: 42461.9, 300 sec: 42820.6). Total num frames: 1340817408. Throughput: 0: 10851.9. Samples: 85190136. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:18,978][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:20,282][626795] Updated weights for policy 0, policy_version 163682 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:22,112][626795] Updated weights for policy 0, policy_version 163692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:23,820][626795] Updated weights for policy 0, policy_version 163702 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:23,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42598.4, 300 sec: 43286.9). Total num frames: 1341054976. Throughput: 0: 10962.7. Samples: 85260726. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:23,976][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:25,512][626795] Updated weights for policy 0, policy_version 163712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:27,321][626795] Updated weights for policy 0, policy_version 163722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:28,975][24592] Fps is (10 sec: 46693.9, 60 sec: 42461.8, 300 sec: 43292.7). Total num frames: 1341284352. Throughput: 0: 10978.5. Samples: 85295994. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:28,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:29,035][626795] Updated weights for policy 0, policy_version 163732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:30,723][626795] Updated weights for policy 0, policy_version 163742 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:32,588][626795] Updated weights for policy 0, policy_version 163752 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:33,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44373.3, 300 sec: 43292.6). Total num frames: 1341513728. Throughput: 0: 10980.1. Samples: 85365756. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:33,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:34,248][626795] Updated weights for policy 0, policy_version 163762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:36,125][626795] Updated weights for policy 0, policy_version 163772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:37,824][626795] Updated weights for policy 0, policy_version 163782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:38,976][24592] Fps is (10 sec: 46690.8, 60 sec: 44509.3, 300 sec: 43292.5). Total num frames: 1341751296. Throughput: 0: 10969.8. Samples: 85435614. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:38,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:39,661][626795] Updated weights for policy 0, policy_version 163792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:43,829][626795] Updated weights for policy 0, policy_version 163802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:43,975][24592] Fps is (10 sec: 35225.7, 60 sec: 42598.4, 300 sec: 42876.1). Total num frames: 1341865984. Throughput: 0: 10529.0. Samples: 85451262. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:43,978][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:45,677][626795] Updated weights for policy 0, policy_version 163812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:47,451][626795] Updated weights for policy 0, policy_version 163822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:48,975][24592] Fps is (10 sec: 34409.1, 60 sec: 42598.3, 300 sec: 42848.3). Total num frames: 1342095360. Throughput: 0: 10307.0. Samples: 85510872. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:48,976][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:49,230][626795] Updated weights for policy 0, policy_version 163832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:50,979][626795] Updated weights for policy 0, policy_version 163842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:52,780][626795] Updated weights for policy 0, policy_version 163852 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:53,976][24592] Fps is (10 sec: 46693.0, 60 sec: 42598.2, 300 sec: 42848.5). Total num frames: 1342332928. Throughput: 0: 10966.3. Samples: 85579878. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:53,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:54,511][626795] Updated weights for policy 0, policy_version 163862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:56,336][626795] Updated weights for policy 0, policy_version 163872 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:57,989][626795] Updated weights for policy 0, policy_version 163882 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:58,976][24592] Fps is (10 sec: 46693.1, 60 sec: 42598.2, 300 sec: 43276.4). Total num frames: 1342562304. Throughput: 0: 10959.4. Samples: 85614516. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:33:58,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:33:59,814][626795] Updated weights for policy 0, policy_version 163892 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:01,499][626795] Updated weights for policy 0, policy_version 163902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:03,275][626795] Updated weights for policy 0, policy_version 163912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:03,975][24592] Fps is (10 sec: 46695.7, 60 sec: 42598.4, 300 sec: 43376.0). Total num frames: 1342799872. Throughput: 0: 11004.5. Samples: 85685340. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:03,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:05,093][626795] Updated weights for policy 0, policy_version 163922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:06,829][626795] Updated weights for policy 0, policy_version 163932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:08,507][626795] Updated weights for policy 0, policy_version 163942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:08,975][24592] Fps is (10 sec: 46696.0, 60 sec: 44646.4, 300 sec: 43348.2). Total num frames: 1343029248. Throughput: 0: 10984.5. Samples: 85755030. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:08,977][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:10,342][626795] Updated weights for policy 0, policy_version 163952 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:12,140][626795] Updated weights for policy 0, policy_version 163962 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:13,826][626795] Updated weights for policy 0, policy_version 163972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:13,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44782.9, 300 sec: 43348.2). Total num frames: 1343266816. Throughput: 0: 10975.5. Samples: 85789890. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:13,977][24592] Avg episode reward: [(0, '4.907')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:18,075][626795] Updated weights for policy 0, policy_version 163982 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:18,975][24592] Fps is (10 sec: 34406.5, 60 sec: 42598.4, 300 sec: 42876.2). Total num frames: 1343373312. Throughput: 0: 10327.5. Samples: 85830492. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:18,976][24592] Avg episode reward: [(0, '4.394')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:19,951][626795] Updated weights for policy 0, policy_version 163992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:21,698][626795] Updated weights for policy 0, policy_version 164002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:23,443][626795] Updated weights for policy 0, policy_version 164012 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:23,975][24592] Fps is (10 sec: 33587.1, 60 sec: 42461.9, 300 sec: 42848.3). Total num frames: 1343602688. Throughput: 0: 10302.5. Samples: 85899216. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:23,976][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:25,189][626795] Updated weights for policy 0, policy_version 164022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:27,062][626795] Updated weights for policy 0, policy_version 164032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:28,798][626795] Updated weights for policy 0, policy_version 164042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:28,976][24592] Fps is (10 sec: 46693.3, 60 sec: 42598.3, 300 sec: 42848.3). Total num frames: 1343840256. Throughput: 0: 10726.1. Samples: 85933938. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:28,977][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:30,572][626795] Updated weights for policy 0, policy_version 164052 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:32,169][626795] Updated weights for policy 0, policy_version 164062 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:33,956][626795] Updated weights for policy 0, policy_version 164072 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:33,977][24592] Fps is (10 sec: 47506.5, 60 sec: 42733.9, 300 sec: 43325.7). Total num frames: 1344077824. Throughput: 0: 10960.3. Samples: 86004102. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:33,978][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:35,752][626795] Updated weights for policy 0, policy_version 164082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:37,432][626795] Updated weights for policy 0, policy_version 164092 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:38,975][24592] Fps is (10 sec: 46695.5, 60 sec: 42599.0, 300 sec: 43376.1). Total num frames: 1344307200. Throughput: 0: 11004.7. Samples: 86075088. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:38,976][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:39,238][626795] Updated weights for policy 0, policy_version 164102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:40,996][626795] Updated weights for policy 0, policy_version 164112 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:42,773][626795] Updated weights for policy 0, policy_version 164122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:43,975][24592] Fps is (10 sec: 46701.3, 60 sec: 44646.4, 300 sec: 43403.7). Total num frames: 1344544768. Throughput: 0: 11007.6. Samples: 86109852. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:43,977][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:44,413][626795] Updated weights for policy 0, policy_version 164132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:46,302][626795] Updated weights for policy 0, policy_version 164142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:48,061][626795] Updated weights for policy 0, policy_version 164152 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:48,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44646.4, 300 sec: 43403.7). Total num frames: 1344774144. Throughput: 0: 10974.7. Samples: 86179200. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:48,977][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:52,409][626795] Updated weights for policy 0, policy_version 164162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:53,976][24592] Fps is (10 sec: 34405.0, 60 sec: 42598.3, 300 sec: 43014.9). Total num frames: 1344888832. Throughput: 0: 10330.2. Samples: 86219892. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:53,977][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:54,108][626795] Updated weights for policy 0, policy_version 164172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:55,896][626795] Updated weights for policy 0, policy_version 164182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:57,580][626795] Updated weights for policy 0, policy_version 164192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:58,975][24592] Fps is (10 sec: 34406.4, 60 sec: 42598.6, 300 sec: 43042.7). Total num frames: 1345118208. Throughput: 0: 10323.2. Samples: 86254434. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:34:58,978][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:34:59,434][626795] Updated weights for policy 0, policy_version 164202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:01,239][626795] Updated weights for policy 0, policy_version 164212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:02,903][626795] Updated weights for policy 0, policy_version 164222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:03,975][24592] Fps is (10 sec: 45877.3, 60 sec: 42461.9, 300 sec: 43042.7). Total num frames: 1345347584. Throughput: 0: 10965.3. Samples: 86323932. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:03,976][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:04,005][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000164228_1345355776.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:04,052][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000162962_1334984704.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:04,771][626795] Updated weights for policy 0, policy_version 164232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:06,571][626795] Updated weights for policy 0, policy_version 164242 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:08,190][626795] Updated weights for policy 0, policy_version 164252 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:08,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42598.4, 300 sec: 43348.2). Total num frames: 1345585152. Throughput: 0: 10991.6. Samples: 86393838. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:08,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:10,031][626795] Updated weights for policy 0, policy_version 164262 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:11,771][626795] Updated weights for policy 0, policy_version 164272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:13,416][626795] Updated weights for policy 0, policy_version 164282 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:13,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42598.4, 300 sec: 43376.0). Total num frames: 1345822720. Throughput: 0: 10998.3. Samples: 86428860. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:13,976][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:15,231][626795] Updated weights for policy 0, policy_version 164292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:17,065][626795] Updated weights for policy 0, policy_version 164302 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:18,863][626795] Updated weights for policy 0, policy_version 164312 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:18,976][24592] Fps is (10 sec: 46693.9, 60 sec: 44646.3, 300 sec: 43348.2). Total num frames: 1346052096. Throughput: 0: 10986.9. Samples: 86498496. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:18,977][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:20,410][626795] Updated weights for policy 0, policy_version 164322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:22,280][626795] Updated weights for policy 0, policy_version 164332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:23,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44646.4, 300 sec: 43320.5). Total num frames: 1346281472. Throughput: 0: 10957.6. Samples: 86568180. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:23,977][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:24,141][626795] Updated weights for policy 0, policy_version 164342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:28,379][626795] Updated weights for policy 0, policy_version 164352 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:28,975][24592] Fps is (10 sec: 34406.8, 60 sec: 42598.5, 300 sec: 42903.9). Total num frames: 1346396160. Throughput: 0: 10348.0. Samples: 86575512. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:28,976][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:30,152][626795] Updated weights for policy 0, policy_version 164362 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:31,961][626795] Updated weights for policy 0, policy_version 164372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:33,736][626795] Updated weights for policy 0, policy_version 164382 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:33,975][24592] Fps is (10 sec: 34406.3, 60 sec: 42462.9, 300 sec: 42848.3). Total num frames: 1346625536. Throughput: 0: 10301.2. Samples: 86642754. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:33,976][24592] Avg episode reward: [(0, '4.363')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:35,394][626795] Updated weights for policy 0, policy_version 164392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:37,236][626795] Updated weights for policy 0, policy_version 164402 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:38,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42461.9, 300 sec: 42820.6). Total num frames: 1346854912. Throughput: 0: 10927.3. Samples: 86711616. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:38,977][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:39,068][626795] Updated weights for policy 0, policy_version 164412 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:40,772][626795] Updated weights for policy 0, policy_version 164422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:42,515][626795] Updated weights for policy 0, policy_version 164432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:43,967][626772] Signal inference workers to stop experience collection... (1250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:43,968][626772] Signal inference workers to resume experience collection... (1250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:43,976][24592] Fps is (10 sec: 46692.2, 60 sec: 42461.5, 300 sec: 43348.4). Total num frames: 1347092480. Throughput: 0: 10949.1. Samples: 86747148. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:43,978][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:43,982][626795] InferenceWorker_p0-w0: stopping experience collection (1250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:43,989][626795] InferenceWorker_p0-w0: resuming experience collection (1250 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:44,284][626795] Updated weights for policy 0, policy_version 164442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:46,019][626795] Updated weights for policy 0, policy_version 164452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:47,734][626795] Updated weights for policy 0, policy_version 164462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:48,976][24592] Fps is (10 sec: 46692.6, 60 sec: 42461.6, 300 sec: 43403.7). Total num frames: 1347321856. Throughput: 0: 10964.8. Samples: 86817354. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:48,978][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:49,506][626795] Updated weights for policy 0, policy_version 164472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:51,331][626795] Updated weights for policy 0, policy_version 164482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:53,109][626795] Updated weights for policy 0, policy_version 164492 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:53,975][24592] Fps is (10 sec: 45877.7, 60 sec: 44373.7, 300 sec: 43403.7). Total num frames: 1347551232. Throughput: 0: 10947.6. Samples: 86886480. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:53,977][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:54,901][626795] Updated weights for policy 0, policy_version 164502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:56,747][626795] Updated weights for policy 0, policy_version 164512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:35:58,482][626795] Updated weights for policy 0, policy_version 164522 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:58,975][24592] Fps is (10 sec: 45876.7, 60 sec: 44373.3, 300 sec: 43375.9). Total num frames: 1347780608. Throughput: 0: 10930.3. Samples: 86920722. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:35:58,977][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:02,793][626795] Updated weights for policy 0, policy_version 164532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:03,975][24592] Fps is (10 sec: 34406.1, 60 sec: 42461.8, 300 sec: 42959.4). Total num frames: 1347895296. Throughput: 0: 10268.7. Samples: 86960586. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:03,977][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:04,567][626795] Updated weights for policy 0, policy_version 164542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:06,371][626795] Updated weights for policy 0, policy_version 164552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:08,086][626795] Updated weights for policy 0, policy_version 164562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:08,975][24592] Fps is (10 sec: 35225.6, 60 sec: 42461.9, 300 sec: 42987.2). Total num frames: 1348132864. Throughput: 0: 10260.7. Samples: 87029910. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:08,979][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:09,814][626795] Updated weights for policy 0, policy_version 164572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:11,603][626795] Updated weights for policy 0, policy_version 164582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:13,421][626795] Updated weights for policy 0, policy_version 164592 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:13,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42325.3, 300 sec: 42959.4). Total num frames: 1348362240. Throughput: 0: 10871.8. Samples: 87064740. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:13,977][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:15,049][626795] Updated weights for policy 0, policy_version 164602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:16,899][626795] Updated weights for policy 0, policy_version 164612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:18,675][626795] Updated weights for policy 0, policy_version 164622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:18,976][24592] Fps is (10 sec: 45874.8, 60 sec: 42325.4, 300 sec: 43292.7). Total num frames: 1348591616. Throughput: 0: 10938.5. Samples: 87134988. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:18,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:20,442][626795] Updated weights for policy 0, policy_version 164632 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:21,990][626795] Updated weights for policy 0, policy_version 164642 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:23,855][626795] Updated weights for policy 0, policy_version 164652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:23,976][24592] Fps is (10 sec: 46692.5, 60 sec: 42461.6, 300 sec: 43320.3). Total num frames: 1348829184. Throughput: 0: 10968.3. Samples: 87205194. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:23,977][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:25,694][626795] Updated weights for policy 0, policy_version 164662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:27,414][626795] Updated weights for policy 0, policy_version 164672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:28,975][24592] Fps is (10 sec: 46695.2, 60 sec: 44373.4, 300 sec: 43320.5). Total num frames: 1349058560. Throughput: 0: 10945.1. Samples: 87239670. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:28,977][24592] Avg episode reward: [(0, '4.851')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:29,144][626795] Updated weights for policy 0, policy_version 164682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:30,916][626795] Updated weights for policy 0, policy_version 164692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:32,728][626795] Updated weights for policy 0, policy_version 164702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:36,282][24592] Fps is (10 sec: 37276.6, 60 sec: 42730.3, 300 sec: 42956.7). Total num frames: 1349287936. Throughput: 0: 10404.8. Samples: 87309570. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:36,291][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:37,071][626795] Updated weights for policy 0, policy_version 164712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:38,788][626795] Updated weights for policy 0, policy_version 164722 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:38,975][24592] Fps is (10 sec: 35225.6, 60 sec: 42598.4, 300 sec: 42903.9). Total num frames: 1349410816. Throughput: 0: 10290.9. Samples: 87349572. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:38,976][24592] Avg episode reward: [(0, '4.424')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:40,550][626795] Updated weights for policy 0, policy_version 164732 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:42,354][626795] Updated weights for policy 0, policy_version 164742 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:43,976][24592] Fps is (10 sec: 45784.7, 60 sec: 42461.6, 300 sec: 42876.0). Total num frames: 1349640192. Throughput: 0: 10305.9. Samples: 87384498. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:43,978][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:44,011][626795] Updated weights for policy 0, policy_version 164752 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:45,804][626795] Updated weights for policy 0, policy_version 164762 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:47,591][626795] Updated weights for policy 0, policy_version 164772 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:48,976][24592] Fps is (10 sec: 45873.9, 60 sec: 42461.9, 300 sec: 42876.0). Total num frames: 1349869568. Throughput: 0: 10972.2. Samples: 87454338. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:48,978][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:49,423][626795] Updated weights for policy 0, policy_version 164782 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:51,075][626795] Updated weights for policy 0, policy_version 164792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:52,965][626795] Updated weights for policy 0, policy_version 164802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:53,975][24592] Fps is (10 sec: 46698.6, 60 sec: 42598.4, 300 sec: 43292.6). Total num frames: 1350107136. Throughput: 0: 10977.5. Samples: 87523896. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:53,977][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:54,638][626795] Updated weights for policy 0, policy_version 164812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:56,459][626795] Updated weights for policy 0, policy_version 164822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:58,167][626795] Updated weights for policy 0, policy_version 164832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:58,976][24592] Fps is (10 sec: 46694.7, 60 sec: 42598.3, 300 sec: 43292.6). Total num frames: 1350336512. Throughput: 0: 10985.2. Samples: 87559074. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:36:58,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:36:59,922][626795] Updated weights for policy 0, policy_version 164842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:01,654][626795] Updated weights for policy 0, policy_version 164852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:03,413][626795] Updated weights for policy 0, policy_version 164862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:03,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44646.4, 300 sec: 43320.4). Total num frames: 1350574080. Throughput: 0: 10970.6. Samples: 87628662. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:03,977][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000164865_1350574080.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:04,041][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000163604_1340243968.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:05,238][626795] Updated weights for policy 0, policy_version 164872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:07,018][626795] Updated weights for policy 0, policy_version 164882 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:11,057][24592] Fps is (10 sec: 36615.4, 60 sec: 42621.6, 300 sec: 42934.1). Total num frames: 1350778880. Throughput: 0: 9732.5. Samples: 87663414. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:11,058][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:11,298][626795] Updated weights for policy 0, policy_version 164892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:13,119][626795] Updated weights for policy 0, policy_version 164902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:13,976][24592] Fps is (10 sec: 34402.8, 60 sec: 42597.7, 300 sec: 42875.9). Total num frames: 1350918144. Throughput: 0: 10303.4. Samples: 87703332. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:13,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:14,851][626795] Updated weights for policy 0, policy_version 164912 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:16,701][626795] Updated weights for policy 0, policy_version 164922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:18,476][626795] Updated weights for policy 0, policy_version 164932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:18,975][24592] Fps is (10 sec: 46555.3, 60 sec: 42598.5, 300 sec: 42876.1). Total num frames: 1351147520. Throughput: 0: 10848.2. Samples: 87772710. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:18,977][24592] Avg episode reward: [(0, '4.410')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:20,114][626795] Updated weights for policy 0, policy_version 164942 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:21,971][626795] Updated weights for policy 0, policy_version 164952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:23,847][626795] Updated weights for policy 0, policy_version 164962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:23,977][24592] Fps is (10 sec: 45874.7, 60 sec: 42461.4, 300 sec: 42848.2). Total num frames: 1351376896. Throughput: 0: 10927.3. Samples: 87841314. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:23,977][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:25,508][626795] Updated weights for policy 0, policy_version 164972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:27,323][626795] Updated weights for policy 0, policy_version 164982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:28,975][24592] Fps is (10 sec: 45875.1, 60 sec: 42461.9, 300 sec: 43237.1). Total num frames: 1351606272. Throughput: 0: 10933.0. Samples: 87876474. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:28,976][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:29,052][626795] Updated weights for policy 0, policy_version 164992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:30,841][626795] Updated weights for policy 0, policy_version 165002 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:32,681][626795] Updated weights for policy 0, policy_version 165012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:33,975][24592] Fps is (10 sec: 45880.5, 60 sec: 44159.9, 300 sec: 43237.1). Total num frames: 1351835648. Throughput: 0: 10902.7. Samples: 87944958. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:33,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:34,484][626795] Updated weights for policy 0, policy_version 165022 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:36,313][626795] Updated weights for policy 0, policy_version 165032 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:38,169][626795] Updated weights for policy 0, policy_version 165042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:38,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44236.8, 300 sec: 43237.1). Total num frames: 1352065024. Throughput: 0: 10882.1. Samples: 88013592. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:38,976][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:39,821][626795] Updated weights for policy 0, policy_version 165052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:41,660][626795] Updated weights for policy 0, policy_version 165062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:45,833][24592] Fps is (10 sec: 35925.7, 60 sec: 42379.4, 300 sec: 42856.2). Total num frames: 1352261632. Throughput: 0: 10439.8. Samples: 88048254. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:45,834][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:45,894][626795] Updated weights for policy 0, policy_version 165072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:47,678][626795] Updated weights for policy 0, policy_version 165082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:48,976][24592] Fps is (10 sec: 34404.1, 60 sec: 42325.0, 300 sec: 42820.5). Total num frames: 1352409088. Throughput: 0: 10217.8. Samples: 88088472. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:48,980][24592] Avg episode reward: [(0, '5.009')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:49,446][626795] Updated weights for policy 0, policy_version 165092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:51,182][626795] Updated weights for policy 0, policy_version 165102 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:53,004][626795] Updated weights for policy 0, policy_version 165112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:53,975][24592] Fps is (10 sec: 46278.3, 60 sec: 42188.7, 300 sec: 42820.6). Total num frames: 1352638464. Throughput: 0: 11508.5. Samples: 88157340. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:53,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:54,765][626795] Updated weights for policy 0, policy_version 165122 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:56,538][626795] Updated weights for policy 0, policy_version 165132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:37:58,317][626795] Updated weights for policy 0, policy_version 165142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:58,976][24592] Fps is (10 sec: 46695.5, 60 sec: 42325.2, 300 sec: 42820.5). Total num frames: 1352876032. Throughput: 0: 10856.7. Samples: 88191876. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:37:58,977][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:00,112][626795] Updated weights for policy 0, policy_version 165152 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:01,785][626795] Updated weights for policy 0, policy_version 165162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:03,544][626795] Updated weights for policy 0, policy_version 165172 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:03,975][24592] Fps is (10 sec: 46694.6, 60 sec: 42188.8, 300 sec: 43237.1). Total num frames: 1353105408. Throughput: 0: 10870.9. Samples: 88261902. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:03,977][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:05,291][626795] Updated weights for policy 0, policy_version 165182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:07,101][626795] Updated weights for policy 0, policy_version 165192 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:08,924][626795] Updated weights for policy 0, policy_version 165202 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:08,975][24592] Fps is (10 sec: 45877.0, 60 sec: 44129.4, 300 sec: 43237.1). Total num frames: 1353334784. Throughput: 0: 10915.5. Samples: 88332498. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:08,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:10,557][626795] Updated weights for policy 0, policy_version 165212 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:12,434][626795] Updated weights for policy 0, policy_version 165222 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:13,975][24592] Fps is (10 sec: 46694.2, 60 sec: 44237.5, 300 sec: 43237.1). Total num frames: 1353572352. Throughput: 0: 10898.1. Samples: 88366890. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:13,977][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:14,099][626795] Updated weights for policy 0, policy_version 165232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:15,905][626795] Updated weights for policy 0, policy_version 165242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:17,660][626795] Updated weights for policy 0, policy_version 165252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:20,601][24592] Fps is (10 sec: 36641.2, 60 sec: 42405.0, 300 sec: 42834.4). Total num frames: 1353760768. Throughput: 0: 10548.7. Samples: 88436802. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:20,603][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:21,955][626795] Updated weights for policy 0, policy_version 165262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:23,699][626795] Updated weights for policy 0, policy_version 165272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:23,975][24592] Fps is (10 sec: 34406.5, 60 sec: 42326.1, 300 sec: 42820.6). Total num frames: 1353916416. Throughput: 0: 10309.3. Samples: 88477512. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:23,976][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:25,452][626795] Updated weights for policy 0, policy_version 165282 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:27,279][626795] Updated weights for policy 0, policy_version 165292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:28,975][24592] Fps is (10 sec: 45977.6, 60 sec: 42325.3, 300 sec: 42820.6). Total num frames: 1354145792. Throughput: 0: 10736.3. Samples: 88511448. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:28,977][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:29,028][626795] Updated weights for policy 0, policy_version 165302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:30,793][626795] Updated weights for policy 0, policy_version 165312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:32,505][626795] Updated weights for policy 0, policy_version 165322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:33,975][24592] Fps is (10 sec: 45875.4, 60 sec: 42325.3, 300 sec: 42792.9). Total num frames: 1354375168. Throughput: 0: 10948.0. Samples: 88581126. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:33,976][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:34,290][626795] Updated weights for policy 0, policy_version 165332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:36,170][626795] Updated weights for policy 0, policy_version 165342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:37,796][626795] Updated weights for policy 0, policy_version 165352 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:38,976][24592] Fps is (10 sec: 46690.3, 60 sec: 42461.2, 300 sec: 43209.2). Total num frames: 1354612736. Throughput: 0: 10978.3. Samples: 88651374. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:38,981][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:39,626][626795] Updated weights for policy 0, policy_version 165362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:41,295][626795] Updated weights for policy 0, policy_version 165372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:43,122][626795] Updated weights for policy 0, policy_version 165382 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:43,976][24592] Fps is (10 sec: 46692.6, 60 sec: 44381.6, 300 sec: 43209.3). Total num frames: 1354842112. Throughput: 0: 10979.8. Samples: 88685964. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:43,977][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:44,842][626795] Updated weights for policy 0, policy_version 165392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:46,687][626795] Updated weights for policy 0, policy_version 165402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:48,416][626795] Updated weights for policy 0, policy_version 165412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:48,975][24592] Fps is (10 sec: 45879.4, 60 sec: 44373.8, 300 sec: 43181.6). Total num frames: 1355071488. Throughput: 0: 10966.7. Samples: 88755402. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:48,976][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:50,184][626795] Updated weights for policy 0, policy_version 165422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:51,983][626795] Updated weights for policy 0, policy_version 165432 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:55,366][24592] Fps is (10 sec: 35960.7, 60 sec: 42567.6, 300 sec: 42813.2). Total num frames: 1355251712. Throughput: 0: 9860.9. Samples: 88789950. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:55,368][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:56,261][626795] Updated weights for policy 0, policy_version 165442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:58,025][626795] Updated weights for policy 0, policy_version 165452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:58,975][24592] Fps is (10 sec: 35225.3, 60 sec: 42462.1, 300 sec: 42792.8). Total num frames: 1355423744. Throughput: 0: 10299.5. Samples: 88830366. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:38:58,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:38:59,798][626795] Updated weights for policy 0, policy_version 165462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:01,615][626795] Updated weights for policy 0, policy_version 165472 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:03,247][626795] Updated weights for policy 0, policy_version 165482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:03,975][24592] Fps is (10 sec: 46624.2, 60 sec: 42461.9, 300 sec: 42792.8). Total num frames: 1355653120. Throughput: 0: 10674.3. Samples: 88899792. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:03,976][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:03,978][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000165485_1355653120.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:04,054][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000164228_1345355776.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:05,209][626795] Updated weights for policy 0, policy_version 165492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:06,918][626795] Updated weights for policy 0, policy_version 165502 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:08,646][626795] Updated weights for policy 0, policy_version 165512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:08,976][24592] Fps is (10 sec: 45874.5, 60 sec: 42461.7, 300 sec: 42765.0). Total num frames: 1355882496. Throughput: 0: 10923.5. Samples: 88969074. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:08,976][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:10,372][626795] Updated weights for policy 0, policy_version 165522 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:12,220][626795] Updated weights for policy 0, policy_version 165532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:13,874][626795] Updated weights for policy 0, policy_version 165542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:13,976][24592] Fps is (10 sec: 46689.8, 60 sec: 42461.2, 300 sec: 43209.2). Total num frames: 1356120064. Throughput: 0: 10949.6. Samples: 89004192. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:13,977][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:15,744][626795] Updated weights for policy 0, policy_version 165552 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:17,360][626795] Updated weights for policy 0, policy_version 165562 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:18,976][24592] Fps is (10 sec: 47512.9, 60 sec: 44486.3, 300 sec: 43237.0). Total num frames: 1356357632. Throughput: 0: 10956.7. Samples: 89074182. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:18,977][24592] Avg episode reward: [(0, '4.415')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:19,156][626795] Updated weights for policy 0, policy_version 165572 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:20,911][626795] Updated weights for policy 0, policy_version 165582 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:22,580][626795] Updated weights for policy 0, policy_version 165592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:23,975][24592] Fps is (10 sec: 46699.0, 60 sec: 44509.9, 300 sec: 43209.4). Total num frames: 1356587008. Throughput: 0: 10963.8. Samples: 89144736. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:23,977][24592] Avg episode reward: [(0, '4.433')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:24,391][626795] Updated weights for policy 0, policy_version 165602 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:26,223][626795] Updated weights for policy 0, policy_version 165612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:30,218][24592] Fps is (10 sec: 34975.6, 60 sec: 42536.4, 300 sec: 42779.4). Total num frames: 1356750848. Throughput: 0: 10668.0. Samples: 89179278. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:30,219][24592] Avg episode reward: [(0, '5.025')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:30,641][626795] Updated weights for policy 0, policy_version 165622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:32,271][626795] Updated weights for policy 0, policy_version 165632 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:33,975][24592] Fps is (10 sec: 34406.3, 60 sec: 42598.3, 300 sec: 42792.8). Total num frames: 1356931072. Throughput: 0: 10297.1. Samples: 89218770. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:33,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:34,173][626795] Updated weights for policy 0, policy_version 165642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:35,906][626795] Updated weights for policy 0, policy_version 165652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:37,645][626795] Updated weights for policy 0, policy_version 165662 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:38,976][24592] Fps is (10 sec: 46771.7, 60 sec: 42462.2, 300 sec: 42765.0). Total num frames: 1357160448. Throughput: 0: 11425.1. Samples: 89288196. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:38,977][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:39,376][626795] Updated weights for policy 0, policy_version 165672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:41,163][626795] Updated weights for policy 0, policy_version 165682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:42,924][626795] Updated weights for policy 0, policy_version 165692 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:43,975][24592] Fps is (10 sec: 46694.7, 60 sec: 42598.7, 300 sec: 42792.8). Total num frames: 1357398016. Throughput: 0: 10934.7. Samples: 89322426. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:43,976][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:44,681][626795] Updated weights for policy 0, policy_version 165702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:46,431][626795] Updated weights for policy 0, policy_version 165712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:48,061][626795] Updated weights for policy 0, policy_version 165722 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:48,975][24592] Fps is (10 sec: 46696.2, 60 sec: 42598.4, 300 sec: 43181.6). Total num frames: 1357627392. Throughput: 0: 10963.2. Samples: 89393136. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:48,976][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:49,893][626795] Updated weights for policy 0, policy_version 165732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:51,700][626795] Updated weights for policy 0, policy_version 165742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:53,440][626795] Updated weights for policy 0, policy_version 165752 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:53,975][24592] Fps is (10 sec: 46694.0, 60 sec: 44587.5, 300 sec: 43209.3). Total num frames: 1357864960. Throughput: 0: 10971.5. Samples: 89462790. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:53,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:55,228][626795] Updated weights for policy 0, policy_version 165762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:56,966][626795] Updated weights for policy 0, policy_version 165772 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:39:58,805][626795] Updated weights for policy 0, policy_version 165782 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:58,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44509.9, 300 sec: 43209.3). Total num frames: 1358094336. Throughput: 0: 10972.8. Samples: 89497956. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:39:58,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:00,480][626795] Updated weights for policy 0, policy_version 165792 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:02,281][626795] Updated weights for policy 0, policy_version 165802 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:05,032][24592] Fps is (10 sec: 34823.9, 60 sec: 42532.3, 300 sec: 42778.5). Total num frames: 1358249984. Throughput: 0: 9939.7. Samples: 89531964. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:05,033][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:06,683][626795] Updated weights for policy 0, policy_version 165812 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:08,481][626795] Updated weights for policy 0, policy_version 165822 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:08,975][24592] Fps is (10 sec: 33587.3, 60 sec: 42462.0, 300 sec: 42737.2). Total num frames: 1358430208. Throughput: 0: 10254.0. Samples: 89606166. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:08,976][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:10,243][626795] Updated weights for policy 0, policy_version 165832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:12,056][626795] Updated weights for policy 0, policy_version 165842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:13,774][626795] Updated weights for policy 0, policy_version 165852 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:13,976][24592] Fps is (10 sec: 46712.5, 60 sec: 42462.4, 300 sec: 42765.0). Total num frames: 1358667776. Throughput: 0: 10550.7. Samples: 89640948. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:13,994][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:15,629][626795] Updated weights for policy 0, policy_version 165862 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:17,246][626795] Updated weights for policy 0, policy_version 165872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:18,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42325.6, 300 sec: 42765.0). Total num frames: 1358897152. Throughput: 0: 10926.1. Samples: 89710446. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:18,976][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:19,097][626795] Updated weights for policy 0, policy_version 165882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:20,776][626795] Updated weights for policy 0, policy_version 165892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:22,497][626795] Updated weights for policy 0, policy_version 165902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:23,975][24592] Fps is (10 sec: 46695.7, 60 sec: 42461.9, 300 sec: 43181.6). Total num frames: 1359134720. Throughput: 0: 10953.2. Samples: 89781084. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:23,978][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:24,307][626795] Updated weights for policy 0, policy_version 165912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:26,108][626795] Updated weights for policy 0, policy_version 165922 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:27,787][626795] Updated weights for policy 0, policy_version 165932 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:28,975][24592] Fps is (10 sec: 46694.7, 60 sec: 44475.5, 300 sec: 43181.6). Total num frames: 1359364096. Throughput: 0: 10962.1. Samples: 89815722. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:28,977][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:29,653][626795] Updated weights for policy 0, policy_version 165942 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:31,381][626795] Updated weights for policy 0, policy_version 165952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:33,146][626795] Updated weights for policy 0, policy_version 165962 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:33,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44373.4, 300 sec: 43181.6). Total num frames: 1359593472. Throughput: 0: 10926.1. Samples: 89884812. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:33,977][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:34,948][626795] Updated weights for policy 0, policy_version 165972 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:36,764][626795] Updated weights for policy 0, policy_version 165982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:39,863][24592] Fps is (10 sec: 34611.7, 60 sec: 42381.5, 300 sec: 42747.6). Total num frames: 1359740928. Throughput: 0: 9948.4. Samples: 89919294. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:39,864][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:41,041][626795] Updated weights for policy 0, policy_version 165992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:42,877][626795] Updated weights for policy 0, policy_version 166002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:43,975][24592] Fps is (10 sec: 34406.1, 60 sec: 42325.3, 300 sec: 42765.1). Total num frames: 1359937536. Throughput: 0: 10238.1. Samples: 89958672. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:43,979][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:44,656][626795] Updated weights for policy 0, policy_version 166012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:46,369][626795] Updated weights for policy 0, policy_version 166022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:48,131][626795] Updated weights for policy 0, policy_version 166032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:48,975][24592] Fps is (10 sec: 46746.5, 60 sec: 42325.3, 300 sec: 42765.0). Total num frames: 1360166912. Throughput: 0: 11293.2. Samples: 90028230. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:48,976][24592] Avg episode reward: [(0, '4.499')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:49,968][626795] Updated weights for policy 0, policy_version 166042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:51,708][626795] Updated weights for policy 0, policy_version 166052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:53,359][626795] Updated weights for policy 0, policy_version 166062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:53,975][24592] Fps is (10 sec: 46694.6, 60 sec: 42325.4, 300 sec: 42792.8). Total num frames: 1360404480. Throughput: 0: 10946.3. Samples: 90098748. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:53,977][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:55,112][626795] Updated weights for policy 0, policy_version 166072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:56,824][626795] Updated weights for policy 0, policy_version 166082 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:40:58,599][626795] Updated weights for policy 0, policy_version 166092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:58,975][24592] Fps is (10 sec: 47513.1, 60 sec: 42461.8, 300 sec: 43209.3). Total num frames: 1360642048. Throughput: 0: 10965.4. Samples: 90134388. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:40:58,976][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:00,327][626795] Updated weights for policy 0, policy_version 166102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:02,039][626795] Updated weights for policy 0, policy_version 166112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:03,837][626795] Updated weights for policy 0, policy_version 166122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:03,975][24592] Fps is (10 sec: 47513.1, 60 sec: 44612.6, 300 sec: 43209.3). Total num frames: 1360879616. Throughput: 0: 10986.6. Samples: 90204846. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:03,976][24592] Avg episode reward: [(0, '4.474')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000166123_1360879616.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:04,041][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000164865_1350574080.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:05,703][626795] Updated weights for policy 0, policy_version 166132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:07,421][626795] Updated weights for policy 0, policy_version 166142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:08,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44646.3, 300 sec: 43209.3). Total num frames: 1361108992. Throughput: 0: 10968.1. Samples: 90274650. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:08,977][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:09,054][626795] Updated weights for policy 0, policy_version 166152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:10,961][626795] Updated weights for policy 0, policy_version 166162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:14,779][24592] Fps is (10 sec: 34122.1, 60 sec: 42439.7, 300 sec: 42787.3). Total num frames: 1361248256. Throughput: 0: 10763.2. Samples: 90308718. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:14,780][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:15,416][626795] Updated weights for policy 0, policy_version 166172 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:17,154][626795] Updated weights for policy 0, policy_version 166182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:18,953][626795] Updated weights for policy 0, policy_version 166192 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:18,976][24592] Fps is (10 sec: 33585.7, 60 sec: 42461.5, 300 sec: 42765.0). Total num frames: 1361444864. Throughput: 0: 10278.8. Samples: 90347364. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:18,977][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:20,755][626795] Updated weights for policy 0, policy_version 166202 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:22,490][626795] Updated weights for policy 0, policy_version 166212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:23,975][24592] Fps is (10 sec: 46321.3, 60 sec: 42325.4, 300 sec: 42765.0). Total num frames: 1361674240. Throughput: 0: 11254.9. Samples: 90415776. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:23,976][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:24,270][626795] Updated weights for policy 0, policy_version 166222 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:25,985][626795] Updated weights for policy 0, policy_version 166232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:27,745][626795] Updated weights for policy 0, policy_version 166242 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:28,975][24592] Fps is (10 sec: 46697.1, 60 sec: 42461.9, 300 sec: 43130.1). Total num frames: 1361911808. Throughput: 0: 10957.4. Samples: 90451752. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:28,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:29,411][626795] Updated weights for policy 0, policy_version 166252 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:31,134][626795] Updated weights for policy 0, policy_version 166262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:32,912][626795] Updated weights for policy 0, policy_version 166272 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:33,976][24592] Fps is (10 sec: 47512.4, 60 sec: 42598.2, 300 sec: 43181.5). Total num frames: 1362149376. Throughput: 0: 11008.2. Samples: 90523602. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:33,977][24592] Avg episode reward: [(0, '4.505')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:34,467][626795] Updated weights for policy 0, policy_version 166282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:36,275][626795] Updated weights for policy 0, policy_version 166292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:38,037][626795] Updated weights for policy 0, policy_version 166302 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:38,975][24592] Fps is (10 sec: 47513.2, 60 sec: 44762.2, 300 sec: 43209.5). Total num frames: 1362386944. Throughput: 0: 11010.3. Samples: 90594210. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:38,977][24592] Avg episode reward: [(0, '4.350')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:39,721][626795] Updated weights for policy 0, policy_version 166312 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:41,594][626795] Updated weights for policy 0, policy_version 166322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:43,437][626795] Updated weights for policy 0, policy_version 166332 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:43,975][24592] Fps is (10 sec: 46695.5, 60 sec: 44646.5, 300 sec: 43209.4). Total num frames: 1362616320. Throughput: 0: 10973.8. Samples: 90628206. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:43,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:45,218][626795] Updated weights for policy 0, policy_version 166342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:46,980][626795] Updated weights for policy 0, policy_version 166352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:49,760][24592] Fps is (10 sec: 34183.0, 60 sec: 42587.8, 300 sec: 42762.4). Total num frames: 1362755584. Throughput: 0: 10005.1. Samples: 90662922. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:49,761][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:51,533][626795] Updated weights for policy 0, policy_version 166362 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:53,327][626795] Updated weights for policy 0, policy_version 166372 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:53,976][24592] Fps is (10 sec: 32764.8, 60 sec: 42324.7, 300 sec: 42737.1). Total num frames: 1362944000. Throughput: 0: 10216.5. Samples: 90734400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:53,978][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:55,120][626795] Updated weights for policy 0, policy_version 166382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:56,948][626795] Updated weights for policy 0, policy_version 166392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:41:58,795][626795] Updated weights for policy 0, policy_version 166402 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:58,975][24592] Fps is (10 sec: 45334.5, 60 sec: 42188.8, 300 sec: 42709.5). Total num frames: 1363173376. Throughput: 0: 10395.8. Samples: 90768174. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:41:58,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:00,425][626795] Updated weights for policy 0, policy_version 166412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:02,132][626795] Updated weights for policy 0, policy_version 166422 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:03,811][626795] Updated weights for policy 0, policy_version 166432 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:03,975][24592] Fps is (10 sec: 46699.0, 60 sec: 42188.9, 300 sec: 43124.9). Total num frames: 1363410944. Throughput: 0: 10930.9. Samples: 90839250. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:03,976][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:05,601][626795] Updated weights for policy 0, policy_version 166442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:07,208][626772] Signal inference workers to stop experience collection... (1300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:07,209][626772] Signal inference workers to resume experience collection... (1300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:07,220][626795] InferenceWorker_p0-w0: stopping experience collection (1300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:07,224][626795] InferenceWorker_p0-w0: resuming experience collection (1300 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:07,252][626795] Updated weights for policy 0, policy_version 166452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:08,976][24592] Fps is (10 sec: 47512.6, 60 sec: 42325.2, 300 sec: 43153.9). Total num frames: 1363648512. Throughput: 0: 10997.1. Samples: 90910650. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:08,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:09,014][626795] Updated weights for policy 0, policy_version 166462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:10,722][626795] Updated weights for policy 0, policy_version 166472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:12,403][626795] Updated weights for policy 0, policy_version 166482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:13,976][24592] Fps is (10 sec: 47511.3, 60 sec: 44560.3, 300 sec: 43181.5). Total num frames: 1363886080. Throughput: 0: 10995.7. Samples: 90946566. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:13,978][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:14,150][626795] Updated weights for policy 0, policy_version 166492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:15,909][626795] Updated weights for policy 0, policy_version 166502 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:17,559][626795] Updated weights for policy 0, policy_version 166512 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:18,975][24592] Fps is (10 sec: 47515.2, 60 sec: 44646.8, 300 sec: 43209.5). Total num frames: 1364123648. Throughput: 0: 10964.1. Samples: 91016982. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:18,976][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:19,461][626795] Updated weights for policy 0, policy_version 166522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:21,280][626795] Updated weights for policy 0, policy_version 166532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:24,750][24592] Fps is (10 sec: 34975.9, 60 sec: 42594.7, 300 sec: 42791.5). Total num frames: 1364262912. Throughput: 0: 9991.8. Samples: 91051578. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:24,751][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:25,812][626795] Updated weights for policy 0, policy_version 166542 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:27,579][626795] Updated weights for policy 0, policy_version 166552 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:28,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42325.3, 300 sec: 42765.0). Total num frames: 1364451328. Throughput: 0: 10223.9. Samples: 91088280. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:28,978][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:29,485][626795] Updated weights for policy 0, policy_version 166562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:31,147][626795] Updated weights for policy 0, policy_version 166572 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:33,070][626795] Updated weights for policy 0, policy_version 166582 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:33,975][24592] Fps is (10 sec: 45287.2, 60 sec: 42189.0, 300 sec: 42765.0). Total num frames: 1364680704. Throughput: 0: 11150.9. Samples: 91155966. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:33,976][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:34,858][626795] Updated weights for policy 0, policy_version 166592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:36,530][626795] Updated weights for policy 0, policy_version 166602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:38,202][626795] Updated weights for policy 0, policy_version 166612 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:38,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42188.8, 300 sec: 43175.7). Total num frames: 1364918272. Throughput: 0: 10944.8. Samples: 91226904. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:38,976][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:39,873][626795] Updated weights for policy 0, policy_version 166622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:41,667][626795] Updated weights for policy 0, policy_version 166632 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:43,420][626795] Updated weights for policy 0, policy_version 166642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:43,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42325.4, 300 sec: 43209.4). Total num frames: 1365155840. Throughput: 0: 10994.7. Samples: 91262934. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:43,976][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:45,025][626795] Updated weights for policy 0, policy_version 166652 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:46,824][626795] Updated weights for policy 0, policy_version 166662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:48,517][626795] Updated weights for policy 0, policy_version 166672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:48,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44546.1, 300 sec: 43237.1). Total num frames: 1365393408. Throughput: 0: 10997.6. Samples: 91334142. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:48,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:50,155][626795] Updated weights for policy 0, policy_version 166682 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:51,983][626795] Updated weights for policy 0, policy_version 166692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:53,811][626795] Updated weights for policy 0, policy_version 166702 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:53,977][24592] Fps is (10 sec: 46685.2, 60 sec: 44645.7, 300 sec: 43209.1). Total num frames: 1365622784. Throughput: 0: 10976.9. Samples: 91404630. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:53,978][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:42:55,570][626795] Updated weights for policy 0, policy_version 166712 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:59,750][24592] Fps is (10 sec: 34212.8, 60 sec: 42594.5, 300 sec: 42791.5). Total num frames: 1365762048. Throughput: 0: 10757.4. Samples: 91438980. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:42:59,751][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:00,135][626795] Updated weights for policy 0, policy_version 166722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:01,964][626795] Updated weights for policy 0, policy_version 166732 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:03,714][626795] Updated weights for policy 0, policy_version 166742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:03,975][24592] Fps is (10 sec: 32774.2, 60 sec: 42325.3, 300 sec: 42765.0). Total num frames: 1365950464. Throughput: 0: 10199.3. Samples: 91475952. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:03,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:04,003][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000166743_1365958656.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:04,054][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000165485_1355653120.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:05,615][626795] Updated weights for policy 0, policy_version 166752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:07,498][626795] Updated weights for policy 0, policy_version 166762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:08,975][24592] Fps is (10 sec: 45288.2, 60 sec: 42189.0, 300 sec: 42737.2). Total num frames: 1366179840. Throughput: 0: 11112.0. Samples: 91543014. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:08,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:09,222][626795] Updated weights for policy 0, policy_version 166772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:10,880][626795] Updated weights for policy 0, policy_version 166782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:12,650][626795] Updated weights for policy 0, policy_version 166792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:13,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42325.6, 300 sec: 43169.6). Total num frames: 1366425600. Throughput: 0: 10902.7. Samples: 91578900. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:13,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:14,384][626795] Updated weights for policy 0, policy_version 166802 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:16,064][626795] Updated weights for policy 0, policy_version 166812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:17,770][626795] Updated weights for policy 0, policy_version 166822 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:18,975][24592] Fps is (10 sec: 47513.2, 60 sec: 42188.7, 300 sec: 43181.5). Total num frames: 1366654976. Throughput: 0: 10989.4. Samples: 91650492. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:18,976][24592] Avg episode reward: [(0, '4.493')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:19,583][626795] Updated weights for policy 0, policy_version 166832 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:21,199][626795] Updated weights for policy 0, policy_version 166842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:22,960][626795] Updated weights for policy 0, policy_version 166852 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:23,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44538.7, 300 sec: 43237.1). Total num frames: 1366900736. Throughput: 0: 11009.1. Samples: 91722312. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:23,976][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:24,684][626795] Updated weights for policy 0, policy_version 166862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:26,533][626795] Updated weights for policy 0, policy_version 166872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:28,089][626795] Updated weights for policy 0, policy_version 166882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:28,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44646.3, 300 sec: 43237.1). Total num frames: 1367130112. Throughput: 0: 10983.6. Samples: 91757196. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:28,977][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:29,891][626795] Updated weights for policy 0, policy_version 166892 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:31,661][626795] Updated weights for policy 0, policy_version 166902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:34,766][24592] Fps is (10 sec: 34162.5, 60 sec: 42583.2, 300 sec: 42789.3). Total num frames: 1367269376. Throughput: 0: 10004.2. Samples: 91792242. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:34,768][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:36,298][626795] Updated weights for policy 0, policy_version 166912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:38,049][626795] Updated weights for policy 0, policy_version 166922 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:38,975][24592] Fps is (10 sec: 33587.6, 60 sec: 42461.9, 300 sec: 42792.8). Total num frames: 1367465984. Throughput: 0: 10192.2. Samples: 91863258. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:38,978][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:39,946][626795] Updated weights for policy 0, policy_version 166932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:41,700][626795] Updated weights for policy 0, policy_version 166942 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:43,587][626795] Updated weights for policy 0, policy_version 166952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:43,975][24592] Fps is (10 sec: 45366.2, 60 sec: 42188.7, 300 sec: 42765.0). Total num frames: 1367687168. Throughput: 0: 10360.5. Samples: 91897176. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:43,977][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:45,263][626795] Updated weights for policy 0, policy_version 166962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:46,902][626795] Updated weights for policy 0, policy_version 166972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:48,702][626795] Updated weights for policy 0, policy_version 166982 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:48,984][24592] Fps is (10 sec: 45835.5, 60 sec: 42182.7, 300 sec: 43161.6). Total num frames: 1367924736. Throughput: 0: 10912.2. Samples: 91967094. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:48,985][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:50,401][626795] Updated weights for policy 0, policy_version 166992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:52,060][626795] Updated weights for policy 0, policy_version 167002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:53,928][626795] Updated weights for policy 0, policy_version 167012 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:53,975][24592] Fps is (10 sec: 47514.2, 60 sec: 42326.7, 300 sec: 43181.6). Total num frames: 1368162304. Throughput: 0: 11010.0. Samples: 92038464. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:53,976][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:55,604][626795] Updated weights for policy 0, policy_version 167022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:57,283][626795] Updated weights for policy 0, policy_version 167032 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:58,975][24592] Fps is (10 sec: 47554.7, 60 sec: 44538.9, 300 sec: 43209.3). Total num frames: 1368399872. Throughput: 0: 11001.3. Samples: 92073960. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:43:58,979][24592] Avg episode reward: [(0, '4.446')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:43:59,006][626795] Updated weights for policy 0, policy_version 167042 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:00,789][626795] Updated weights for policy 0, policy_version 167052 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:02,545][626795] Updated weights for policy 0, policy_version 167062 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:03,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44783.0, 300 sec: 43237.1). Total num frames: 1368637440. Throughput: 0: 10999.4. Samples: 92145462. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:03,979][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:04,262][626795] Updated weights for policy 0, policy_version 167072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:05,982][626795] Updated weights for policy 0, policy_version 167082 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:09,834][24592] Fps is (10 sec: 34704.5, 60 sec: 42670.7, 300 sec: 42779.5). Total num frames: 1368776704. Throughput: 0: 9992.2. Samples: 92180538. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:09,835][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:10,610][626795] Updated weights for policy 0, policy_version 167092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:12,561][626795] Updated weights for policy 0, policy_version 167102 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:13,975][24592] Fps is (10 sec: 31948.6, 60 sec: 42188.8, 300 sec: 42709.5). Total num frames: 1368956928. Throughput: 0: 10199.3. Samples: 92216166. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:13,977][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:14,374][626795] Updated weights for policy 0, policy_version 167112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:16,155][626795] Updated weights for policy 0, policy_version 167122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:18,046][626795] Updated weights for policy 0, policy_version 167132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:18,975][24592] Fps is (10 sec: 44805.9, 60 sec: 42188.9, 300 sec: 42709.5). Total num frames: 1369186304. Throughput: 0: 11092.5. Samples: 92282634. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:18,976][24592] Avg episode reward: [(0, '4.902')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:19,830][626795] Updated weights for policy 0, policy_version 167142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:21,450][626795] Updated weights for policy 0, policy_version 167152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:23,170][626795] Updated weights for policy 0, policy_version 167162 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:23,976][24592] Fps is (10 sec: 46689.4, 60 sec: 42051.5, 300 sec: 43141.0). Total num frames: 1369423872. Throughput: 0: 10905.3. Samples: 92354010. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:23,978][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:24,917][626795] Updated weights for policy 0, policy_version 167172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:26,574][626795] Updated weights for policy 0, policy_version 167182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:28,215][626795] Updated weights for policy 0, policy_version 167192 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:28,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42188.9, 300 sec: 43153.8). Total num frames: 1369661440. Throughput: 0: 10961.8. Samples: 92390454. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:28,976][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:30,034][626795] Updated weights for policy 0, policy_version 167202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:31,643][626795] Updated weights for policy 0, policy_version 167212 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:33,451][626795] Updated weights for policy 0, policy_version 167222 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:33,976][24592] Fps is (10 sec: 47518.2, 60 sec: 44412.4, 300 sec: 43181.6). Total num frames: 1369899008. Throughput: 0: 10996.2. Samples: 92461830. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:33,980][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:35,256][626795] Updated weights for policy 0, policy_version 167232 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:36,958][626795] Updated weights for policy 0, policy_version 167242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:38,629][626795] Updated weights for policy 0, policy_version 167252 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:38,976][24592] Fps is (10 sec: 47510.9, 60 sec: 44509.5, 300 sec: 43181.5). Total num frames: 1370136576. Throughput: 0: 10980.0. Samples: 92532570. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:38,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:40,508][626795] Updated weights for policy 0, policy_version 167262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:44,896][24592] Fps is (10 sec: 35256.2, 60 sec: 42626.9, 300 sec: 42770.3). Total num frames: 1370284032. Throughput: 0: 10753.1. Samples: 92567754. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:44,898][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:44,984][626795] Updated weights for policy 0, policy_version 167272 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:46,847][626795] Updated weights for policy 0, policy_version 167282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:48,670][626795] Updated weights for policy 0, policy_version 167292 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:48,975][24592] Fps is (10 sec: 32769.5, 60 sec: 42331.4, 300 sec: 42709.5). Total num frames: 1370464256. Throughput: 0: 10164.0. Samples: 92602842. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:48,977][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:50,591][626795] Updated weights for policy 0, policy_version 167302 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:52,387][626795] Updated weights for policy 0, policy_version 167312 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:53,975][24592] Fps is (10 sec: 44212.5, 60 sec: 42052.3, 300 sec: 42681.7). Total num frames: 1370685440. Throughput: 0: 11099.6. Samples: 92670492. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:53,976][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:54,196][626795] Updated weights for policy 0, policy_version 167322 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:55,889][626795] Updated weights for policy 0, policy_version 167332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:57,524][626795] Updated weights for policy 0, policy_version 167342 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:58,975][24592] Fps is (10 sec: 46694.8, 60 sec: 42188.8, 300 sec: 43141.7). Total num frames: 1370931200. Throughput: 0: 10883.9. Samples: 92705940. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:44:58,976][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:44:59,330][626795] Updated weights for policy 0, policy_version 167352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:01,070][626795] Updated weights for policy 0, policy_version 167362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:02,687][626795] Updated weights for policy 0, policy_version 167372 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:03,977][24592] Fps is (10 sec: 48324.3, 60 sec: 42187.6, 300 sec: 43181.3). Total num frames: 1371168768. Throughput: 0: 11000.9. Samples: 92777694. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:03,978][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000167379_1371168768.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000166123_1360879616.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:04,521][626795] Updated weights for policy 0, policy_version 167382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:06,221][626795] Updated weights for policy 0, policy_version 167392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:07,928][626795] Updated weights for policy 0, policy_version 167402 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:08,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44324.8, 300 sec: 43153.8). Total num frames: 1371398144. Throughput: 0: 10982.5. Samples: 92848212. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:08,977][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:09,727][626795] Updated weights for policy 0, policy_version 167412 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:11,400][626795] Updated weights for policy 0, policy_version 167422 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:13,167][626795] Updated weights for policy 0, policy_version 167432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:13,975][24592] Fps is (10 sec: 46702.2, 60 sec: 44646.4, 300 sec: 43181.5). Total num frames: 1371635712. Throughput: 0: 10963.0. Samples: 92883792. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:13,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:14,914][626795] Updated weights for policy 0, policy_version 167442 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:16,601][626795] Updated weights for policy 0, policy_version 167452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:20,038][24592] Fps is (10 sec: 35543.6, 60 sec: 42661.8, 300 sec: 42749.8). Total num frames: 1371791360. Throughput: 0: 9926.2. Samples: 92919060. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:20,040][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:21,453][626795] Updated weights for policy 0, policy_version 167462 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:23,247][626795] Updated weights for policy 0, policy_version 167472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:23,975][24592] Fps is (10 sec: 31948.9, 60 sec: 42189.6, 300 sec: 42681.7). Total num frames: 1371955200. Throughput: 0: 10108.0. Samples: 92987424. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:23,977][24592] Avg episode reward: [(0, '4.892')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:25,096][626795] Updated weights for policy 0, policy_version 167482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:26,982][626795] Updated weights for policy 0, policy_version 167492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:28,816][626795] Updated weights for policy 0, policy_version 167502 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:28,975][24592] Fps is (10 sec: 43998.2, 60 sec: 42052.3, 300 sec: 42681.7). Total num frames: 1372184576. Throughput: 0: 10278.8. Samples: 93020832. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:28,977][24592] Avg episode reward: [(0, '4.897')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:30,406][626795] Updated weights for policy 0, policy_version 167512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:32,191][626795] Updated weights for policy 0, policy_version 167522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:33,950][626795] Updated weights for policy 0, policy_version 167532 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:33,976][24592] Fps is (10 sec: 46691.0, 60 sec: 42051.8, 300 sec: 43116.8). Total num frames: 1372422144. Throughput: 0: 10837.6. Samples: 93090540. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:33,978][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:35,784][626795] Updated weights for policy 0, policy_version 167542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:37,509][626795] Updated weights for policy 0, policy_version 167552 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:38,976][24592] Fps is (10 sec: 46693.9, 60 sec: 41916.0, 300 sec: 43098.2). Total num frames: 1372651520. Throughput: 0: 10904.9. Samples: 93161214. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:38,977][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:39,261][626795] Updated weights for policy 0, policy_version 167562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:40,893][626795] Updated weights for policy 0, policy_version 167572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:42,643][626795] Updated weights for policy 0, policy_version 167582 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:43,976][24592] Fps is (10 sec: 47516.1, 60 sec: 44232.9, 300 sec: 43153.8). Total num frames: 1372897280. Throughput: 0: 10900.6. Samples: 93196470. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:43,977][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:44,397][626795] Updated weights for policy 0, policy_version 167592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:46,145][626795] Updated weights for policy 0, policy_version 167602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:47,787][626795] Updated weights for policy 0, policy_version 167612 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:48,976][24592] Fps is (10 sec: 47513.3, 60 sec: 44373.3, 300 sec: 43126.0). Total num frames: 1373126656. Throughput: 0: 10893.7. Samples: 93267894. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:48,977][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:49,617][626795] Updated weights for policy 0, policy_version 167622 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:50,858][626772] Signal inference workers to stop experience collection... (1350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:50,859][626772] Signal inference workers to resume experience collection... (1350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:50,865][626795] InferenceWorker_p0-w0: stopping experience collection (1350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:50,874][626795] InferenceWorker_p0-w0: resuming experience collection (1350 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:51,330][626795] Updated weights for policy 0, policy_version 167632 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:54,949][24592] Fps is (10 sec: 35086.1, 60 sec: 42589.7, 300 sec: 42707.3). Total num frames: 1373282304. Throughput: 0: 9896.2. Samples: 93303180. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:54,950][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:55,862][626795] Updated weights for policy 0, policy_version 167642 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:57,664][626795] Updated weights for policy 0, policy_version 167652 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:58,975][24592] Fps is (10 sec: 33587.8, 60 sec: 42188.8, 300 sec: 42654.0). Total num frames: 1373462528. Throughput: 0: 10158.7. Samples: 93340932. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:45:58,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:45:59,558][626795] Updated weights for policy 0, policy_version 167662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:01,345][626795] Updated weights for policy 0, policy_version 167672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:03,159][626795] Updated weights for policy 0, policy_version 167682 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:03,981][24592] Fps is (10 sec: 44442.5, 60 sec: 41912.7, 300 sec: 42625.3). Total num frames: 1373683712. Throughput: 0: 11117.4. Samples: 93407592. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:03,983][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:04,918][626795] Updated weights for policy 0, policy_version 167692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:06,628][626795] Updated weights for policy 0, policy_version 167702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:08,316][626795] Updated weights for policy 0, policy_version 167712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:08,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42052.3, 300 sec: 43076.8). Total num frames: 1373921280. Throughput: 0: 10911.1. Samples: 93478422. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:08,976][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:10,128][626795] Updated weights for policy 0, policy_version 167722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:11,798][626795] Updated weights for policy 0, policy_version 167732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:13,530][626795] Updated weights for policy 0, policy_version 167742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:13,976][24592] Fps is (10 sec: 47539.7, 60 sec: 42051.9, 300 sec: 43098.2). Total num frames: 1374158848. Throughput: 0: 10955.2. Samples: 93513822. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:13,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:15,318][626795] Updated weights for policy 0, policy_version 167752 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:17,011][626795] Updated weights for policy 0, policy_version 167762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:18,716][626795] Updated weights for policy 0, policy_version 167772 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:18,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44200.6, 300 sec: 43126.0). Total num frames: 1374396416. Throughput: 0: 10989.0. Samples: 93585036. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:18,976][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:20,490][626795] Updated weights for policy 0, policy_version 167782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:22,188][626795] Updated weights for policy 0, policy_version 167792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:23,972][626795] Updated weights for policy 0, policy_version 167802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:23,975][24592] Fps is (10 sec: 47516.2, 60 sec: 44646.4, 300 sec: 43126.0). Total num frames: 1374633984. Throughput: 0: 10985.2. Samples: 93655548. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:23,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:25,701][626795] Updated weights for policy 0, policy_version 167812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:30,072][24592] Fps is (10 sec: 34696.1, 60 sec: 42503.9, 300 sec: 42661.9). Total num frames: 1374781440. Throughput: 0: 10720.5. Samples: 93690648. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:30,074][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:30,426][626795] Updated weights for policy 0, policy_version 167822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:32,257][626795] Updated weights for policy 0, policy_version 167832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:33,975][24592] Fps is (10 sec: 31948.4, 60 sec: 42189.3, 300 sec: 42598.4). Total num frames: 1374953472. Throughput: 0: 10147.1. Samples: 93724512. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:33,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:34,130][626795] Updated weights for policy 0, policy_version 167842 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:36,032][626795] Updated weights for policy 0, policy_version 167852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:37,783][626795] Updated weights for policy 0, policy_version 167862 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:38,975][24592] Fps is (10 sec: 44166.6, 60 sec: 42052.3, 300 sec: 42570.6). Total num frames: 1375174656. Throughput: 0: 11111.8. Samples: 93792390. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:38,977][24592] Avg episode reward: [(0, '4.393')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:39,529][626795] Updated weights for policy 0, policy_version 167872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:41,329][626795] Updated weights for policy 0, policy_version 167882 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:42,982][626795] Updated weights for policy 0, policy_version 167892 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:43,976][24592] Fps is (10 sec: 45875.3, 60 sec: 41915.8, 300 sec: 43018.2). Total num frames: 1375412224. Throughput: 0: 10809.8. Samples: 93827376. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:43,977][24592] Avg episode reward: [(0, '4.383')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:44,756][626795] Updated weights for policy 0, policy_version 167902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:46,472][626795] Updated weights for policy 0, policy_version 167912 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:48,236][626795] Updated weights for policy 0, policy_version 167922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:48,980][24592] Fps is (10 sec: 47489.6, 60 sec: 42048.8, 300 sec: 43069.9). Total num frames: 1375649792. Throughput: 0: 10908.4. Samples: 93898458. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:48,982][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:49,927][626795] Updated weights for policy 0, policy_version 167932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:51,693][626795] Updated weights for policy 0, policy_version 167942 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:53,346][626795] Updated weights for policy 0, policy_version 167952 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:53,976][24592] Fps is (10 sec: 47512.0, 60 sec: 44133.7, 300 sec: 43098.2). Total num frames: 1375887360. Throughput: 0: 10913.5. Samples: 93969534. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:53,980][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:55,154][626795] Updated weights for policy 0, policy_version 167962 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:56,882][626795] Updated weights for policy 0, policy_version 167972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:46:58,807][626795] Updated weights for policy 0, policy_version 167982 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:58,975][24592] Fps is (10 sec: 46718.2, 60 sec: 44236.8, 300 sec: 43070.5). Total num frames: 1376116736. Throughput: 0: 10898.8. Samples: 94004262. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:46:58,977][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:00,567][626795] Updated weights for policy 0, policy_version 167992 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:05,035][24592] Fps is (10 sec: 34075.4, 60 sec: 42266.1, 300 sec: 42612.1). Total num frames: 1376264192. Throughput: 0: 9839.3. Samples: 94038228. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:05,036][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:05,041][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000168001_1376264192.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:05,110][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000166743_1365958656.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:05,172][626795] Updated weights for policy 0, policy_version 168002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:06,961][626795] Updated weights for policy 0, policy_version 168012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:08,788][626795] Updated weights for policy 0, policy_version 168022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:08,975][24592] Fps is (10 sec: 32768.0, 60 sec: 42052.3, 300 sec: 42570.7). Total num frames: 1376444416. Throughput: 0: 10074.1. Samples: 94108884. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:08,977][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:10,520][626795] Updated weights for policy 0, policy_version 168032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:12,473][626795] Updated weights for policy 0, policy_version 168042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:13,975][24592] Fps is (10 sec: 44896.5, 60 sec: 41779.6, 300 sec: 42515.1). Total num frames: 1376665600. Throughput: 0: 10285.1. Samples: 94142196. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:13,976][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:14,073][626795] Updated weights for policy 0, policy_version 168052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:15,876][626795] Updated weights for policy 0, policy_version 168062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:17,744][626795] Updated weights for policy 0, policy_version 168072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:18,975][24592] Fps is (10 sec: 45055.8, 60 sec: 41642.6, 300 sec: 42933.3). Total num frames: 1376894976. Throughput: 0: 10815.4. Samples: 94211202. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:18,976][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:19,554][626795] Updated weights for policy 0, policy_version 168082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:21,232][626795] Updated weights for policy 0, policy_version 168092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:23,107][626795] Updated weights for policy 0, policy_version 168102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:23,976][24592] Fps is (10 sec: 45872.8, 60 sec: 41505.8, 300 sec: 42959.3). Total num frames: 1377124352. Throughput: 0: 10822.7. Samples: 94279416. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:23,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:24,922][626795] Updated weights for policy 0, policy_version 168112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:26,737][626795] Updated weights for policy 0, policy_version 168122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:28,411][626795] Updated weights for policy 0, policy_version 168132 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:28,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43669.9, 300 sec: 42959.4). Total num frames: 1377353728. Throughput: 0: 10813.6. Samples: 94313988. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:28,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:30,234][626795] Updated weights for policy 0, policy_version 168142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:31,970][626795] Updated weights for policy 0, policy_version 168152 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:33,861][626795] Updated weights for policy 0, policy_version 168162 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:33,975][24592] Fps is (10 sec: 46696.6, 60 sec: 43963.8, 300 sec: 42959.4). Total num frames: 1377591296. Throughput: 0: 10766.5. Samples: 94382898. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:33,976][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:35,495][626795] Updated weights for policy 0, policy_version 168172 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:39,361][24592] Fps is (10 sec: 37073.4, 60 sec: 42462.2, 300 sec: 42598.3). Total num frames: 1377738752. Throughput: 0: 9883.3. Samples: 94418088. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:39,362][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:39,517][626795] Updated weights for policy 0, policy_version 168182 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:41,189][626795] Updated weights for policy 0, policy_version 168192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:43,070][626795] Updated weights for policy 0, policy_version 168202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:43,975][24592] Fps is (10 sec: 35225.7, 60 sec: 42188.9, 300 sec: 42542.9). Total num frames: 1377943552. Throughput: 0: 10185.3. Samples: 94462602. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:43,976][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:44,847][626795] Updated weights for policy 0, policy_version 168212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:46,598][626795] Updated weights for policy 0, policy_version 168222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:48,436][626795] Updated weights for policy 0, policy_version 168232 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:48,975][24592] Fps is (10 sec: 45158.3, 60 sec: 42055.8, 300 sec: 42543.1). Total num frames: 1378172928. Throughput: 0: 11219.8. Samples: 94531236. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:48,977][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:50,184][626795] Updated weights for policy 0, policy_version 168242 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:51,963][626795] Updated weights for policy 0, policy_version 168252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:53,832][626795] Updated weights for policy 0, policy_version 168262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:53,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42052.6, 300 sec: 42989.0). Total num frames: 1378410496. Throughput: 0: 10921.6. Samples: 94600356. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:53,976][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:55,412][626795] Updated weights for policy 0, policy_version 168272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:57,251][626795] Updated weights for policy 0, policy_version 168282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:58,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42052.3, 300 sec: 43015.0). Total num frames: 1378639872. Throughput: 0: 10942.8. Samples: 94634622. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:47:58,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:47:59,015][626795] Updated weights for policy 0, policy_version 168292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:00,824][626795] Updated weights for policy 0, policy_version 168302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:02,547][626795] Updated weights for policy 0, policy_version 168312 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:03,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44197.8, 300 sec: 43015.0). Total num frames: 1378869248. Throughput: 0: 10961.6. Samples: 94704474. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:03,978][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:04,374][626795] Updated weights for policy 0, policy_version 168322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:06,170][626795] Updated weights for policy 0, policy_version 168332 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:07,976][626795] Updated weights for policy 0, policy_version 168342 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:08,976][24592] Fps is (10 sec: 46692.2, 60 sec: 44373.0, 300 sec: 42987.1). Total num frames: 1379106816. Throughput: 0: 10984.5. Samples: 94773720. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:08,976][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:09,660][626795] Updated weights for policy 0, policy_version 168352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:11,556][626795] Updated weights for policy 0, policy_version 168362 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:14,019][24592] Fps is (10 sec: 35074.3, 60 sec: 42567.8, 300 sec: 42592.2). Total num frames: 1379221504. Throughput: 0: 10206.5. Samples: 94773720. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:14,020][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:15,745][626795] Updated weights for policy 0, policy_version 168372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:17,382][626795] Updated weights for policy 0, policy_version 168382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:18,975][24592] Fps is (10 sec: 34408.0, 60 sec: 42598.4, 300 sec: 42542.9). Total num frames: 1379450880. Throughput: 0: 10369.3. Samples: 94849518. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:18,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:19,245][626795] Updated weights for policy 0, policy_version 168392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:20,978][626795] Updated weights for policy 0, policy_version 168402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:22,826][626795] Updated weights for policy 0, policy_version 168412 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:23,975][24592] Fps is (10 sec: 46074.1, 60 sec: 42598.8, 300 sec: 42542.9). Total num frames: 1379680256. Throughput: 0: 11211.5. Samples: 94918284. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:23,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:24,554][626795] Updated weights for policy 0, policy_version 168422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:26,336][626795] Updated weights for policy 0, policy_version 168432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:28,132][626795] Updated weights for policy 0, policy_version 168442 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:28,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42734.9, 300 sec: 42991.3). Total num frames: 1379917824. Throughput: 0: 10902.5. Samples: 94953216. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:28,976][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:29,835][626795] Updated weights for policy 0, policy_version 168452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:31,536][626795] Updated weights for policy 0, policy_version 168462 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:33,359][626795] Updated weights for policy 0, policy_version 168472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:33,976][24592] Fps is (10 sec: 46690.6, 60 sec: 42597.8, 300 sec: 42987.1). Total num frames: 1380147200. Throughput: 0: 10947.0. Samples: 95023860. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:33,978][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:35,052][626795] Updated weights for policy 0, policy_version 168482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:36,860][626795] Updated weights for policy 0, policy_version 168492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:38,593][626795] Updated weights for policy 0, policy_version 168502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:38,977][24592] Fps is (10 sec: 45868.9, 60 sec: 44247.0, 300 sec: 43014.8). Total num frames: 1380376576. Throughput: 0: 10947.8. Samples: 95093022. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:38,978][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:40,446][626795] Updated weights for policy 0, policy_version 168512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:42,209][626795] Updated weights for policy 0, policy_version 168522 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:43,932][626795] Updated weights for policy 0, policy_version 168532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:43,976][24592] Fps is (10 sec: 46696.8, 60 sec: 44509.6, 300 sec: 43016.2). Total num frames: 1380614144. Throughput: 0: 10960.7. Samples: 95127858. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:43,978][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:45,602][626795] Updated weights for policy 0, policy_version 168542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:48,975][24592] Fps is (10 sec: 35230.2, 60 sec: 42598.3, 300 sec: 42598.4). Total num frames: 1380728832. Throughput: 0: 10609.3. Samples: 95181894. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:48,976][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:50,107][626795] Updated weights for policy 0, policy_version 168552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:51,874][626795] Updated weights for policy 0, policy_version 168562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:53,728][626795] Updated weights for policy 0, policy_version 168572 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:53,975][24592] Fps is (10 sec: 33588.2, 60 sec: 42325.3, 300 sec: 42542.9). Total num frames: 1380950016. Throughput: 0: 10274.2. Samples: 95236056. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:53,976][24592] Avg episode reward: [(0, '4.320')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:55,442][626795] Updated weights for policy 0, policy_version 168582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:57,177][626795] Updated weights for policy 0, policy_version 168592 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:48:58,815][626795] Updated weights for policy 0, policy_version 168602 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:58,975][24592] Fps is (10 sec: 45875.7, 60 sec: 42461.9, 300 sec: 42542.9). Total num frames: 1381187584. Throughput: 0: 11057.5. Samples: 95270832. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:48:58,976][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:00,662][626795] Updated weights for policy 0, policy_version 168612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:02,410][626795] Updated weights for policy 0, policy_version 168622 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:03,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42598.4, 300 sec: 43001.2). Total num frames: 1381425152. Throughput: 0: 10949.9. Samples: 95342262. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:03,976][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000168631_1381425152.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000167379_1371168768.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:04,087][626795] Updated weights for policy 0, policy_version 168632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:05,822][626795] Updated weights for policy 0, policy_version 168642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:07,480][626795] Updated weights for policy 0, policy_version 168652 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:08,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42598.7, 300 sec: 43070.5). Total num frames: 1381662720. Throughput: 0: 10998.7. Samples: 95413224. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:08,976][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:09,293][626795] Updated weights for policy 0, policy_version 168662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:10,934][626795] Updated weights for policy 0, policy_version 168672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:12,703][626795] Updated weights for policy 0, policy_version 168682 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:13,976][24592] Fps is (10 sec: 47512.8, 60 sec: 44678.4, 300 sec: 43098.2). Total num frames: 1381900288. Throughput: 0: 11012.4. Samples: 95448774. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:13,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:14,505][626795] Updated weights for policy 0, policy_version 168692 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:16,306][626795] Updated weights for policy 0, policy_version 168702 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:18,037][626795] Updated weights for policy 0, policy_version 168712 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:18,976][24592] Fps is (10 sec: 46693.5, 60 sec: 44646.2, 300 sec: 43070.6). Total num frames: 1382129664. Throughput: 0: 10987.5. Samples: 95518290. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:18,979][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:19,893][626795] Updated weights for policy 0, policy_version 168722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:23,976][24592] Fps is (10 sec: 32767.9, 60 sec: 42461.7, 300 sec: 42598.4). Total num frames: 1382227968. Throughput: 0: 10355.1. Samples: 95558988. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:23,977][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:24,448][626795] Updated weights for policy 0, policy_version 168732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:26,276][626795] Updated weights for policy 0, policy_version 168742 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:28,147][626795] Updated weights for policy 0, policy_version 168752 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:28,975][24592] Fps is (10 sec: 31949.3, 60 sec: 42188.8, 300 sec: 42542.9). Total num frames: 1382449152. Throughput: 0: 10236.6. Samples: 95588502. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:28,977][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:29,955][626795] Updated weights for policy 0, policy_version 168762 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:31,592][626795] Updated weights for policy 0, policy_version 168772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:33,380][626795] Updated weights for policy 0, policy_version 168782 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:33,975][24592] Fps is (10 sec: 45876.4, 60 sec: 42325.9, 300 sec: 42542.9). Total num frames: 1382686720. Throughput: 0: 10577.8. Samples: 95657892. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:33,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:35,000][626795] Updated weights for policy 0, policy_version 168792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:36,785][626795] Updated weights for policy 0, policy_version 168802 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:38,467][626795] Updated weights for policy 0, policy_version 168812 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:38,975][24592] Fps is (10 sec: 48333.5, 60 sec: 42599.5, 300 sec: 43010.4). Total num frames: 1382932480. Throughput: 0: 10970.6. Samples: 95729730. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:38,976][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:40,248][626795] Updated weights for policy 0, policy_version 168822 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:41,945][626795] Updated weights for policy 0, policy_version 168832 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:43,665][626795] Updated weights for policy 0, policy_version 168842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:43,975][24592] Fps is (10 sec: 48332.6, 60 sec: 42598.6, 300 sec: 43070.5). Total num frames: 1383170048. Throughput: 0: 10986.3. Samples: 95765214. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:43,976][24592] Avg episode reward: [(0, '4.939')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:45,321][626795] Updated weights for policy 0, policy_version 168852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:47,240][626795] Updated weights for policy 0, policy_version 168862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:48,814][626795] Updated weights for policy 0, policy_version 168872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:48,975][24592] Fps is (10 sec: 46693.7, 60 sec: 44509.9, 300 sec: 43098.2). Total num frames: 1383399424. Throughput: 0: 10968.0. Samples: 95835822. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:48,977][24592] Avg episode reward: [(0, '4.376')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:50,669][626795] Updated weights for policy 0, policy_version 168882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:52,483][626795] Updated weights for policy 0, policy_version 168892 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:53,975][24592] Fps is (10 sec: 45874.8, 60 sec: 44646.3, 300 sec: 43042.7). Total num frames: 1383628800. Throughput: 0: 10935.8. Samples: 95905338. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:53,978][24592] Avg episode reward: [(0, '4.381')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:54,290][626795] Updated weights for policy 0, policy_version 168902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:49:56,035][626795] Updated weights for policy 0, policy_version 168912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:58,975][24592] Fps is (10 sec: 32768.1, 60 sec: 42325.3, 300 sec: 42570.9). Total num frames: 1383727104. Throughput: 0: 10793.6. Samples: 95934486. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:49:58,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:00,701][626795] Updated weights for policy 0, policy_version 168922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:02,474][626795] Updated weights for policy 0, policy_version 168932 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:03,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42188.7, 300 sec: 42570.6). Total num frames: 1383956480. Throughput: 0: 10153.4. Samples: 95975190. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:03,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:04,401][626795] Updated weights for policy 0, policy_version 168942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:06,087][626795] Updated weights for policy 0, policy_version 168952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:07,821][626795] Updated weights for policy 0, policy_version 168962 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:08,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42052.3, 300 sec: 42542.9). Total num frames: 1384185856. Throughput: 0: 10782.3. Samples: 96044190. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:08,977][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:09,657][626795] Updated weights for policy 0, policy_version 168972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:11,443][626795] Updated weights for policy 0, policy_version 168982 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:13,213][626795] Updated weights for policy 0, policy_version 168992 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:13,975][24592] Fps is (10 sec: 45875.6, 60 sec: 41915.8, 300 sec: 42947.5). Total num frames: 1384415232. Throughput: 0: 10889.2. Samples: 96078516. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:13,977][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:14,956][626795] Updated weights for policy 0, policy_version 169002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:16,773][626795] Updated weights for policy 0, policy_version 169012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:18,552][626795] Updated weights for policy 0, policy_version 169022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:18,975][24592] Fps is (10 sec: 45874.7, 60 sec: 41915.8, 300 sec: 43014.9). Total num frames: 1384644608. Throughput: 0: 10876.2. Samples: 96147324. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:18,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:20,260][626795] Updated weights for policy 0, policy_version 169032 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:22,114][626795] Updated weights for policy 0, policy_version 169042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:23,839][626795] Updated weights for policy 0, policy_version 169052 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:23,975][24592] Fps is (10 sec: 45875.1, 60 sec: 44100.4, 300 sec: 43014.9). Total num frames: 1384873984. Throughput: 0: 10830.4. Samples: 96217098. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:23,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:25,605][626795] Updated weights for policy 0, policy_version 169062 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:27,336][626795] Updated weights for policy 0, policy_version 169072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:28,975][24592] Fps is (10 sec: 46694.8, 60 sec: 44373.3, 300 sec: 43015.1). Total num frames: 1385111552. Throughput: 0: 10811.3. Samples: 96251724. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:28,976][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:29,124][626795] Updated weights for policy 0, policy_version 169082 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:30,973][626795] Updated weights for policy 0, policy_version 169092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:33,975][24592] Fps is (10 sec: 36045.1, 60 sec: 42461.9, 300 sec: 42654.0). Total num frames: 1385234432. Throughput: 0: 10440.0. Samples: 96305622. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:33,977][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:34,910][626795] Updated weights for policy 0, policy_version 169102 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:36,699][626795] Updated weights for policy 0, policy_version 169112 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:38,525][626795] Updated weights for policy 0, policy_version 169122 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:38,975][24592] Fps is (10 sec: 35225.7, 60 sec: 42188.7, 300 sec: 42598.4). Total num frames: 1385463808. Throughput: 0: 10195.8. Samples: 96364146. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:38,976][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:40,249][626795] Updated weights for policy 0, policy_version 169132 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:42,020][626795] Updated weights for policy 0, policy_version 169142 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:43,868][626795] Updated weights for policy 0, policy_version 169152 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:43,976][24592] Fps is (10 sec: 45873.9, 60 sec: 42052.1, 300 sec: 42598.4). Total num frames: 1385693184. Throughput: 0: 10319.4. Samples: 96398862. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:43,976][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:45,548][626795] Updated weights for policy 0, policy_version 169162 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:47,292][626795] Updated weights for policy 0, policy_version 169172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:48,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42188.9, 300 sec: 43018.1). Total num frames: 1385930752. Throughput: 0: 10965.9. Samples: 96468654. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:48,976][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:49,196][626795] Updated weights for policy 0, policy_version 169182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:50,927][626795] Updated weights for policy 0, policy_version 169192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:52,785][626795] Updated weights for policy 0, policy_version 169202 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:53,976][24592] Fps is (10 sec: 45875.0, 60 sec: 42052.1, 300 sec: 43014.9). Total num frames: 1386151936. Throughput: 0: 10915.7. Samples: 96535398. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:53,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:54,732][626795] Updated weights for policy 0, policy_version 169212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:56,461][626795] Updated weights for policy 0, policy_version 169222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:58,267][626795] Updated weights for policy 0, policy_version 169232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:58,975][24592] Fps is (10 sec: 44236.7, 60 sec: 44100.3, 300 sec: 43015.8). Total num frames: 1386373120. Throughput: 0: 10922.0. Samples: 96570006. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:50:58,976][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:50:59,990][626795] Updated weights for policy 0, policy_version 169242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:01,838][626795] Updated weights for policy 0, policy_version 169252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:03,593][626795] Updated weights for policy 0, policy_version 169262 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:03,975][24592] Fps is (10 sec: 45876.8, 60 sec: 44236.9, 300 sec: 43014.9). Total num frames: 1386610688. Throughput: 0: 10914.4. Samples: 96638472. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:03,976][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000169264_1386610688.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:04,026][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000168001_1376264192.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:05,362][626795] Updated weights for policy 0, policy_version 169272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:08,975][24592] Fps is (10 sec: 35225.4, 60 sec: 42325.3, 300 sec: 42598.5). Total num frames: 1386725376. Throughput: 0: 10251.9. Samples: 96678432. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:08,978][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:09,645][626795] Updated weights for policy 0, policy_version 169282 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:11,446][626795] Updated weights for policy 0, policy_version 169292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:13,220][626795] Updated weights for policy 0, policy_version 169302 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:13,975][24592] Fps is (10 sec: 34406.0, 60 sec: 42325.3, 300 sec: 42570.6). Total num frames: 1386954752. Throughput: 0: 10258.9. Samples: 96713376. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:13,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:15,019][626795] Updated weights for policy 0, policy_version 169312 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:16,718][626795] Updated weights for policy 0, policy_version 169322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:18,543][626795] Updated weights for policy 0, policy_version 169332 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:18,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42325.4, 300 sec: 42542.9). Total num frames: 1387184128. Throughput: 0: 10595.6. Samples: 96782424. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:18,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:20,261][626795] Updated weights for policy 0, policy_version 169342 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:22,137][626795] Updated weights for policy 0, policy_version 169352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:23,929][626795] Updated weights for policy 0, policy_version 169362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:23,975][24592] Fps is (10 sec: 45875.7, 60 sec: 42325.4, 300 sec: 42980.4). Total num frames: 1387413504. Throughput: 0: 10822.7. Samples: 96851166. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:23,976][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:25,647][626795] Updated weights for policy 0, policy_version 169372 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:27,393][626795] Updated weights for policy 0, policy_version 169382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:28,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42188.8, 300 sec: 43015.0). Total num frames: 1387642880. Throughput: 0: 10824.9. Samples: 96885978. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:28,977][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:29,234][626795] Updated weights for policy 0, policy_version 169392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:31,036][626795] Updated weights for policy 0, policy_version 169402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:32,763][626795] Updated weights for policy 0, policy_version 169412 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:33,975][24592] Fps is (10 sec: 45875.1, 60 sec: 43963.7, 300 sec: 43042.7). Total num frames: 1387872256. Throughput: 0: 10799.9. Samples: 96954648. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:33,980][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:34,598][626795] Updated weights for policy 0, policy_version 169422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:36,360][626795] Updated weights for policy 0, policy_version 169432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:38,129][626795] Updated weights for policy 0, policy_version 169442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:38,975][24592] Fps is (10 sec: 45875.0, 60 sec: 43963.7, 300 sec: 43015.0). Total num frames: 1388101632. Throughput: 0: 10856.7. Samples: 97023948. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:38,976][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:39,960][626795] Updated weights for policy 0, policy_version 169452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:43,975][24592] Fps is (10 sec: 34406.0, 60 sec: 42052.4, 300 sec: 42599.1). Total num frames: 1388216320. Throughput: 0: 10567.4. Samples: 97045542. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:43,977][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:44,255][626795] Updated weights for policy 0, policy_version 169462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:46,024][626795] Updated weights for policy 0, policy_version 169472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:47,821][626795] Updated weights for policy 0, policy_version 169482 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:48,976][24592] Fps is (10 sec: 34405.9, 60 sec: 41915.6, 300 sec: 42570.7). Total num frames: 1388445696. Throughput: 0: 10223.8. Samples: 97098546. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:48,976][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:49,627][626795] Updated weights for policy 0, policy_version 169492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:51,280][626795] Updated weights for policy 0, policy_version 169502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:53,171][626795] Updated weights for policy 0, policy_version 169512 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:53,975][24592] Fps is (10 sec: 46695.0, 60 sec: 42189.0, 300 sec: 42598.4). Total num frames: 1388683264. Throughput: 0: 10852.5. Samples: 97166796. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:53,976][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:54,938][626795] Updated weights for policy 0, policy_version 169522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:56,690][626795] Updated weights for policy 0, policy_version 169532 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:51:58,474][626795] Updated weights for policy 0, policy_version 169542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:58,975][24592] Fps is (10 sec: 45876.0, 60 sec: 42188.8, 300 sec: 43002.7). Total num frames: 1388904448. Throughput: 0: 10852.0. Samples: 97201716. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:51:58,976][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:00,361][626795] Updated weights for policy 0, policy_version 169552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:02,149][626795] Updated weights for policy 0, policy_version 169562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:03,822][626795] Updated weights for policy 0, policy_version 169572 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:03,975][24592] Fps is (10 sec: 45055.7, 60 sec: 42052.2, 300 sec: 43014.9). Total num frames: 1389133824. Throughput: 0: 10832.8. Samples: 97269900. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:03,977][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:05,602][626795] Updated weights for policy 0, policy_version 169582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:07,350][626795] Updated weights for policy 0, policy_version 169592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:08,975][24592] Fps is (10 sec: 46694.0, 60 sec: 44100.3, 300 sec: 43070.5). Total num frames: 1389371392. Throughput: 0: 10864.6. Samples: 97340076. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:08,976][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:09,097][626795] Updated weights for policy 0, policy_version 169602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:10,892][626795] Updated weights for policy 0, policy_version 169612 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:12,681][626795] Updated weights for policy 0, policy_version 169622 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:13,976][24592] Fps is (10 sec: 46694.1, 60 sec: 44100.2, 300 sec: 43070.5). Total num frames: 1389600768. Throughput: 0: 10851.7. Samples: 97374306. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:13,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:14,575][626795] Updated weights for policy 0, policy_version 169632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:18,729][626795] Updated weights for policy 0, policy_version 169642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:18,975][24592] Fps is (10 sec: 33587.1, 60 sec: 42052.2, 300 sec: 42654.0). Total num frames: 1389707264. Throughput: 0: 10250.5. Samples: 97415922. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:18,976][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:20,517][626795] Updated weights for policy 0, policy_version 169652 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:22,372][626795] Updated weights for policy 0, policy_version 169662 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:23,975][24592] Fps is (10 sec: 34406.6, 60 sec: 42188.7, 300 sec: 42681.7). Total num frames: 1389944832. Throughput: 0: 10220.3. Samples: 97483860. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:23,976][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:24,040][626795] Updated weights for policy 0, policy_version 169672 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:25,899][626795] Updated weights for policy 0, policy_version 169682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:27,654][626795] Updated weights for policy 0, policy_version 169692 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:28,976][24592] Fps is (10 sec: 46692.6, 60 sec: 42188.4, 300 sec: 42653.9). Total num frames: 1390174208. Throughput: 0: 10505.1. Samples: 97518276. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:28,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:29,312][626795] Updated weights for policy 0, policy_version 169702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:31,174][626795] Updated weights for policy 0, policy_version 169712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:33,022][626795] Updated weights for policy 0, policy_version 169722 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:33,976][24592] Fps is (10 sec: 45873.7, 60 sec: 42188.5, 300 sec: 42987.7). Total num frames: 1390403584. Throughput: 0: 10867.0. Samples: 97587564. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:33,978][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:34,803][626795] Updated weights for policy 0, policy_version 169732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:36,528][626795] Updated weights for policy 0, policy_version 169742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:38,316][626795] Updated weights for policy 0, policy_version 169752 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:38,976][24592] Fps is (10 sec: 45876.0, 60 sec: 42188.6, 300 sec: 43014.9). Total num frames: 1390632960. Throughput: 0: 10885.8. Samples: 97656660. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:38,978][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:40,070][626795] Updated weights for policy 0, policy_version 169762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:41,897][626795] Updated weights for policy 0, policy_version 169772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:43,689][626795] Updated weights for policy 0, policy_version 169782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:43,975][24592] Fps is (10 sec: 46695.8, 60 sec: 44236.8, 300 sec: 43042.7). Total num frames: 1390870528. Throughput: 0: 10866.9. Samples: 97690728. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:43,976][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:45,434][626795] Updated weights for policy 0, policy_version 169792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:47,147][626795] Updated weights for policy 0, policy_version 169802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:48,933][626795] Updated weights for policy 0, policy_version 169812 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:48,976][24592] Fps is (10 sec: 46693.4, 60 sec: 44236.5, 300 sec: 43014.9). Total num frames: 1391099904. Throughput: 0: 10889.5. Samples: 97759932. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:48,978][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:53,208][626795] Updated weights for policy 0, policy_version 169822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:53,975][24592] Fps is (10 sec: 34406.7, 60 sec: 42188.8, 300 sec: 42626.2). Total num frames: 1391214592. Throughput: 0: 10244.0. Samples: 97801056. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:53,977][24592] Avg episode reward: [(0, '4.963')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:55,081][626795] Updated weights for policy 0, policy_version 169832 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:56,857][626795] Updated weights for policy 0, policy_version 169842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:52:58,490][626795] Updated weights for policy 0, policy_version 169852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:58,975][24592] Fps is (10 sec: 33589.0, 60 sec: 42188.8, 300 sec: 42598.4). Total num frames: 1391435776. Throughput: 0: 10240.2. Samples: 97835112. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:52:58,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:00,374][626795] Updated weights for policy 0, policy_version 169862 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:02,116][626795] Updated weights for policy 0, policy_version 169872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:03,932][626795] Updated weights for policy 0, policy_version 169882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:03,977][24592] Fps is (10 sec: 45867.5, 60 sec: 42324.2, 300 sec: 42598.2). Total num frames: 1391673344. Throughput: 0: 10858.7. Samples: 97904580. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:03,979][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000169882_1391673344.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:04,037][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000168631_1381425152.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:05,773][626795] Updated weights for policy 0, policy_version 169892 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:07,534][626795] Updated weights for policy 0, policy_version 169902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:08,976][24592] Fps is (10 sec: 45874.1, 60 sec: 42052.2, 300 sec: 42965.7). Total num frames: 1391894528. Throughput: 0: 10862.9. Samples: 97972692. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:08,976][24592] Avg episode reward: [(0, '4.857')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:09,332][626795] Updated weights for policy 0, policy_version 169912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:11,145][626795] Updated weights for policy 0, policy_version 169922 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:12,897][626795] Updated weights for policy 0, policy_version 169932 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:13,976][24592] Fps is (10 sec: 45062.4, 60 sec: 42052.2, 300 sec: 42959.4). Total num frames: 1392123904. Throughput: 0: 10865.5. Samples: 98007222. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:13,978][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:14,692][626795] Updated weights for policy 0, policy_version 169942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:16,496][626795] Updated weights for policy 0, policy_version 169952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:18,349][626795] Updated weights for policy 0, policy_version 169962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:18,976][24592] Fps is (10 sec: 45873.7, 60 sec: 44099.9, 300 sec: 42959.3). Total num frames: 1392353280. Throughput: 0: 10842.5. Samples: 98075478. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:18,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:20,056][626795] Updated weights for policy 0, policy_version 169972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:21,801][626795] Updated weights for policy 0, policy_version 169982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:23,629][626795] Updated weights for policy 0, policy_version 169992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:23,976][24592] Fps is (10 sec: 45872.1, 60 sec: 43963.1, 300 sec: 42931.5). Total num frames: 1392582656. Throughput: 0: 10841.4. Samples: 98144532. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:23,979][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:28,003][626795] Updated weights for policy 0, policy_version 170002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:28,975][24592] Fps is (10 sec: 35227.0, 60 sec: 42189.0, 300 sec: 42570.7). Total num frames: 1392705536. Throughput: 0: 10373.2. Samples: 98157522. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:28,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:29,615][626795] Updated weights for policy 0, policy_version 170012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:31,456][626795] Updated weights for policy 0, policy_version 170022 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:33,228][626795] Updated weights for policy 0, policy_version 170032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:33,975][24592] Fps is (10 sec: 35228.5, 60 sec: 42189.0, 300 sec: 42570.8). Total num frames: 1392934912. Throughput: 0: 10211.3. Samples: 98219436. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:33,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:34,970][626795] Updated weights for policy 0, policy_version 170042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:36,838][626795] Updated weights for policy 0, policy_version 170052 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:38,481][626795] Updated weights for policy 0, policy_version 170062 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:38,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42189.0, 300 sec: 42542.9). Total num frames: 1393164288. Throughput: 0: 10844.0. Samples: 98289036. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:38,977][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:40,310][626795] Updated weights for policy 0, policy_version 170072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:42,053][626795] Updated weights for policy 0, policy_version 170082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:43,906][626795] Updated weights for policy 0, policy_version 170092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:43,976][24592] Fps is (10 sec: 45872.0, 60 sec: 42051.8, 300 sec: 42931.5). Total num frames: 1393393664. Throughput: 0: 10851.4. Samples: 98323434. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:43,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:45,597][626795] Updated weights for policy 0, policy_version 170102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:47,449][626795] Updated weights for policy 0, policy_version 170112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:48,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42052.6, 300 sec: 42959.4). Total num frames: 1393623040. Throughput: 0: 10845.2. Samples: 98392596. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:48,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:49,169][626795] Updated weights for policy 0, policy_version 170122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:50,849][626795] Updated weights for policy 0, policy_version 170132 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:52,601][626795] Updated weights for policy 0, policy_version 170142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:53,975][24592] Fps is (10 sec: 46698.2, 60 sec: 44100.3, 300 sec: 42959.4). Total num frames: 1393860608. Throughput: 0: 10872.6. Samples: 98461956. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:53,977][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:54,522][626795] Updated weights for policy 0, policy_version 170152 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:56,307][626795] Updated weights for policy 0, policy_version 170162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:57,076][626772] Signal inference workers to stop experience collection... (1400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:57,080][626772] Signal inference workers to resume experience collection... (1400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:57,084][626795] InferenceWorker_p0-w0: stopping experience collection (1400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:57,089][626795] InferenceWorker_p0-w0: resuming experience collection (1400 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:53:58,104][626795] Updated weights for policy 0, policy_version 170172 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:58,976][24592] Fps is (10 sec: 46694.2, 60 sec: 44236.7, 300 sec: 42931.6). Total num frames: 1394089984. Throughput: 0: 10876.2. Samples: 98496648. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:53:58,978][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:02,324][626795] Updated weights for policy 0, policy_version 170182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:03,975][24592] Fps is (10 sec: 34406.6, 60 sec: 42190.0, 300 sec: 42515.1). Total num frames: 1394204672. Throughput: 0: 10255.5. Samples: 98536968. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:03,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:04,143][626795] Updated weights for policy 0, policy_version 170192 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:05,879][626795] Updated weights for policy 0, policy_version 170202 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:07,721][626795] Updated weights for policy 0, policy_version 170212 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:08,975][24592] Fps is (10 sec: 34406.6, 60 sec: 42325.5, 300 sec: 42487.3). Total num frames: 1394434048. Throughput: 0: 10261.4. Samples: 98606286. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:08,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:09,372][626795] Updated weights for policy 0, policy_version 170222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:11,199][626795] Updated weights for policy 0, policy_version 170232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:12,953][626795] Updated weights for policy 0, policy_version 170242 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:13,975][24592] Fps is (10 sec: 45874.2, 60 sec: 42325.4, 300 sec: 42487.3). Total num frames: 1394663424. Throughput: 0: 10729.1. Samples: 98640330. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:13,976][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:14,807][626795] Updated weights for policy 0, policy_version 170252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:16,542][626795] Updated weights for policy 0, policy_version 170262 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:18,289][626795] Updated weights for policy 0, policy_version 170272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:18,975][24592] Fps is (10 sec: 45875.0, 60 sec: 42325.7, 300 sec: 42931.7). Total num frames: 1394892800. Throughput: 0: 10910.0. Samples: 98710386. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:18,976][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:20,113][626795] Updated weights for policy 0, policy_version 170282 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:21,831][626795] Updated weights for policy 0, policy_version 170292 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:23,635][626795] Updated weights for policy 0, policy_version 170302 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:23,975][24592] Fps is (10 sec: 46695.1, 60 sec: 42462.5, 300 sec: 42987.2). Total num frames: 1395130368. Throughput: 0: 10904.5. Samples: 98779740. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:23,977][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:25,334][626795] Updated weights for policy 0, policy_version 170312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:27,172][626795] Updated weights for policy 0, policy_version 170322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:28,901][626795] Updated weights for policy 0, policy_version 170332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:28,976][24592] Fps is (10 sec: 46692.1, 60 sec: 44236.5, 300 sec: 42959.3). Total num frames: 1395359744. Throughput: 0: 10919.1. Samples: 98814792. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:28,977][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:30,681][626795] Updated weights for policy 0, policy_version 170342 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:32,492][626795] Updated weights for policy 0, policy_version 170352 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:33,976][24592] Fps is (10 sec: 46693.3, 60 sec: 44373.2, 300 sec: 42931.6). Total num frames: 1395597312. Throughput: 0: 10934.3. Samples: 98884644. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:33,976][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:34,175][626795] Updated weights for policy 0, policy_version 170362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:38,477][626795] Updated weights for policy 0, policy_version 170372 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:38,975][24592] Fps is (10 sec: 35227.5, 60 sec: 42461.9, 300 sec: 42515.1). Total num frames: 1395712000. Throughput: 0: 10289.7. Samples: 98924994. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:38,976][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:40,192][626795] Updated weights for policy 0, policy_version 170382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:41,973][626795] Updated weights for policy 0, policy_version 170392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:43,766][626795] Updated weights for policy 0, policy_version 170402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:43,976][24592] Fps is (10 sec: 34405.8, 60 sec: 42462.1, 300 sec: 42515.0). Total num frames: 1395941376. Throughput: 0: 10289.2. Samples: 98959668. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:43,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:45,617][626795] Updated weights for policy 0, policy_version 170412 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:47,339][626795] Updated weights for policy 0, policy_version 170422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:48,967][626795] Updated weights for policy 0, policy_version 170432 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:48,975][24592] Fps is (10 sec: 46694.6, 60 sec: 42598.4, 300 sec: 42542.9). Total num frames: 1396178944. Throughput: 0: 10931.9. Samples: 99028902. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:48,976][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:50,755][626795] Updated weights for policy 0, policy_version 170442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:52,598][626795] Updated weights for policy 0, policy_version 170452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:53,975][24592] Fps is (10 sec: 45876.7, 60 sec: 42325.3, 300 sec: 42959.4). Total num frames: 1396400128. Throughput: 0: 10946.1. Samples: 99098862. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:53,977][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:54,307][626795] Updated weights for policy 0, policy_version 170462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:56,084][626795] Updated weights for policy 0, policy_version 170472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:57,836][626795] Updated weights for policy 0, policy_version 170482 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:58,976][24592] Fps is (10 sec: 45874.3, 60 sec: 42461.8, 300 sec: 42987.2). Total num frames: 1396637696. Throughput: 0: 10970.3. Samples: 99133992. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:54:58,977][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:54:59,574][626795] Updated weights for policy 0, policy_version 170492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:01,448][626795] Updated weights for policy 0, policy_version 170502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:03,142][626795] Updated weights for policy 0, policy_version 170512 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:03,975][24592] Fps is (10 sec: 46694.8, 60 sec: 44373.3, 300 sec: 42987.2). Total num frames: 1396867072. Throughput: 0: 10957.3. Samples: 99203466. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:03,976][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000170516_1396867072.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:04,035][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000169264_1386610688.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:05,004][626795] Updated weights for policy 0, policy_version 170522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:06,736][626795] Updated weights for policy 0, policy_version 170532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:08,468][626795] Updated weights for policy 0, policy_version 170542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:11,372][24592] Fps is (10 sec: 36347.0, 60 sec: 42537.9, 300 sec: 42613.3). Total num frames: 1397088256. Throughput: 0: 10400.3. Samples: 99272676. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:11,373][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:12,773][626795] Updated weights for policy 0, policy_version 170552 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:13,975][24592] Fps is (10 sec: 34406.3, 60 sec: 42461.9, 300 sec: 42598.4). Total num frames: 1397211136. Throughput: 0: 10301.4. Samples: 99278352. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:13,981][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:14,582][626795] Updated weights for policy 0, policy_version 170562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:16,240][626795] Updated weights for policy 0, policy_version 170572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:18,068][626795] Updated weights for policy 0, policy_version 170582 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:18,975][24592] Fps is (10 sec: 47404.1, 60 sec: 42598.4, 300 sec: 42626.2). Total num frames: 1397448704. Throughput: 0: 10305.9. Samples: 99348408. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:18,976][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:19,823][626795] Updated weights for policy 0, policy_version 170592 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:21,567][626795] Updated weights for policy 0, policy_version 170602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:23,286][626795] Updated weights for policy 0, policy_version 170612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:23,976][24592] Fps is (10 sec: 47512.6, 60 sec: 42598.2, 300 sec: 42626.1). Total num frames: 1397686272. Throughput: 0: 10960.2. Samples: 99418206. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:23,977][24592] Avg episode reward: [(0, '4.999')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:25,051][626795] Updated weights for policy 0, policy_version 170622 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:26,710][626795] Updated weights for policy 0, policy_version 170632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:28,470][626795] Updated weights for policy 0, policy_version 170642 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:28,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42598.8, 300 sec: 42987.2). Total num frames: 1397915648. Throughput: 0: 10982.6. Samples: 99453882. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:28,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:30,238][626795] Updated weights for policy 0, policy_version 170652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:32,029][626795] Updated weights for policy 0, policy_version 170662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:33,712][626795] Updated weights for policy 0, policy_version 170672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:33,975][24592] Fps is (10 sec: 45876.5, 60 sec: 42462.1, 300 sec: 42987.2). Total num frames: 1398145024. Throughput: 0: 10999.5. Samples: 99523878. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:33,976][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:35,541][626795] Updated weights for policy 0, policy_version 170682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:37,325][626795] Updated weights for policy 0, policy_version 170692 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:38,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44373.3, 300 sec: 42987.2). Total num frames: 1398374400. Throughput: 0: 10987.0. Samples: 99593274. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:38,976][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:39,112][626795] Updated weights for policy 0, policy_version 170702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:40,883][626795] Updated weights for policy 0, policy_version 170712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:42,676][626795] Updated weights for policy 0, policy_version 170722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:46,235][24592] Fps is (10 sec: 36751.3, 60 sec: 42631.5, 300 sec: 42605.3). Total num frames: 1398595584. Throughput: 0: 10451.5. Samples: 99627924. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:46,238][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:47,022][626795] Updated weights for policy 0, policy_version 170732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:48,767][626795] Updated weights for policy 0, policy_version 170742 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:48,975][24592] Fps is (10 sec: 34406.3, 60 sec: 42325.3, 300 sec: 42598.4). Total num frames: 1398718464. Throughput: 0: 10293.5. Samples: 99666672. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:48,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:50,693][626795] Updated weights for policy 0, policy_version 170752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:52,342][626795] Updated weights for policy 0, policy_version 170762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:53,976][24592] Fps is (10 sec: 45506.9, 60 sec: 42461.6, 300 sec: 42626.1). Total num frames: 1398947840. Throughput: 0: 10863.3. Samples: 99735498. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:53,977][24592] Avg episode reward: [(0, '4.857')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:54,229][626795] Updated weights for policy 0, policy_version 170772 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:56,016][626795] Updated weights for policy 0, policy_version 170782 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:57,728][626795] Updated weights for policy 0, policy_version 170792 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:58,975][24592] Fps is (10 sec: 45875.4, 60 sec: 42325.5, 300 sec: 42598.4). Total num frames: 1399177216. Throughput: 0: 10928.7. Samples: 99770142. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:55:58,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:55:59,490][626795] Updated weights for policy 0, policy_version 170802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:01,201][626795] Updated weights for policy 0, policy_version 170812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:02,905][626795] Updated weights for policy 0, policy_version 170822 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:03,975][24592] Fps is (10 sec: 47515.2, 60 sec: 42598.4, 300 sec: 43042.7). Total num frames: 1399422976. Throughput: 0: 10944.9. Samples: 99840930. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:03,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:04,659][626795] Updated weights for policy 0, policy_version 170832 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:06,348][626795] Updated weights for policy 0, policy_version 170842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:08,133][626795] Updated weights for policy 0, policy_version 170852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:08,975][24592] Fps is (10 sec: 47512.9, 60 sec: 44512.6, 300 sec: 43042.7). Total num frames: 1399652352. Throughput: 0: 10967.4. Samples: 99911736. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:08,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:09,850][626795] Updated weights for policy 0, policy_version 170862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:11,654][626795] Updated weights for policy 0, policy_version 170872 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:13,402][626795] Updated weights for policy 0, policy_version 170882 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:13,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44509.9, 300 sec: 43042.7). Total num frames: 1399881728. Throughput: 0: 10946.9. Samples: 99946494. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:13,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:15,210][626795] Updated weights for policy 0, policy_version 170892 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:16,956][626795] Updated weights for policy 0, policy_version 170902 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:21,116][24592] Fps is (10 sec: 36438.3, 60 sec: 42581.3, 300 sec: 42677.5). Total num frames: 1400094720. Throughput: 0: 10429.4. Samples: 100015524. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:21,117][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:21,386][626795] Updated weights for policy 0, policy_version 170912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:23,238][626795] Updated weights for policy 0, policy_version 170922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:23,975][24592] Fps is (10 sec: 34406.3, 60 sec: 42325.5, 300 sec: 42653.9). Total num frames: 1400225792. Throughput: 0: 10241.3. Samples: 100054134. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:23,978][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:24,973][626795] Updated weights for policy 0, policy_version 170932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:26,813][626795] Updated weights for policy 0, policy_version 170942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:28,562][626795] Updated weights for policy 0, policy_version 170952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:28,975][24592] Fps is (10 sec: 45860.0, 60 sec: 42325.3, 300 sec: 42653.9). Total num frames: 1400455168. Throughput: 0: 10779.4. Samples: 100088640. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:28,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:30,225][626795] Updated weights for policy 0, policy_version 170962 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:32,126][626795] Updated weights for policy 0, policy_version 170972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:33,790][626795] Updated weights for policy 0, policy_version 170982 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:33,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42325.3, 300 sec: 42653.9). Total num frames: 1400684544. Throughput: 0: 10922.8. Samples: 100158198. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:33,992][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:35,621][626795] Updated weights for policy 0, policy_version 170992 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:37,309][626795] Updated weights for policy 0, policy_version 171002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:38,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42461.8, 300 sec: 43070.5). Total num frames: 1400922112. Throughput: 0: 10963.0. Samples: 100228830. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:38,977][24592] Avg episode reward: [(0, '4.890')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:39,037][626795] Updated weights for policy 0, policy_version 171012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:40,770][626795] Updated weights for policy 0, policy_version 171022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:42,548][626795] Updated weights for policy 0, policy_version 171032 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:43,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44407.4, 300 sec: 43098.3). Total num frames: 1401159680. Throughput: 0: 10971.2. Samples: 100263846. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:43,977][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:44,355][626795] Updated weights for policy 0, policy_version 171042 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:46,083][626795] Updated weights for policy 0, policy_version 171052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:47,829][626795] Updated weights for policy 0, policy_version 171062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:48,975][24592] Fps is (10 sec: 46694.1, 60 sec: 44509.8, 300 sec: 43070.5). Total num frames: 1401389056. Throughput: 0: 10936.5. Samples: 100333074. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:48,977][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:49,661][626795] Updated weights for policy 0, policy_version 171072 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:51,444][626795] Updated weights for policy 0, policy_version 171082 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:53,234][626795] Updated weights for policy 0, policy_version 171092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:55,878][24592] Fps is (10 sec: 35788.0, 60 sec: 42612.5, 300 sec: 42711.6). Total num frames: 1401585664. Throughput: 0: 9721.8. Samples: 100367718. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:55,880][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:57,435][626795] Updated weights for policy 0, policy_version 171102 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:58,976][24592] Fps is (10 sec: 34405.0, 60 sec: 42598.0, 300 sec: 42709.4). Total num frames: 1401733120. Throughput: 0: 10261.0. Samples: 100408242. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:56:58,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:56:59,193][626795] Updated weights for policy 0, policy_version 171112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:01,032][626795] Updated weights for policy 0, policy_version 171122 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:02,758][626795] Updated weights for policy 0, policy_version 171132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:03,975][24592] Fps is (10 sec: 46539.7, 60 sec: 42325.4, 300 sec: 42681.7). Total num frames: 1401962496. Throughput: 0: 10781.0. Samples: 100477596. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:03,977][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:04,003][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000171139_1401970688.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:04,095][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000169882_1391673344.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:04,572][626795] Updated weights for policy 0, policy_version 171142 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:06,333][626795] Updated weights for policy 0, policy_version 171152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:08,050][626795] Updated weights for policy 0, policy_version 171162 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:08,975][24592] Fps is (10 sec: 46697.0, 60 sec: 42462.0, 300 sec: 42709.5). Total num frames: 1402200064. Throughput: 0: 10948.0. Samples: 100546794. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:08,977][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:09,906][626795] Updated weights for policy 0, policy_version 171172 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:11,491][626795] Updated weights for policy 0, policy_version 171182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:13,393][626795] Updated weights for policy 0, policy_version 171192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:13,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42461.9, 300 sec: 43126.0). Total num frames: 1402429440. Throughput: 0: 10966.7. Samples: 100582140. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:13,976][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:15,198][626795] Updated weights for policy 0, policy_version 171202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:16,733][626795] Updated weights for policy 0, policy_version 171212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:18,575][626795] Updated weights for policy 0, policy_version 171222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:18,975][24592] Fps is (10 sec: 45874.8, 60 sec: 44315.7, 300 sec: 43098.3). Total num frames: 1402658816. Throughput: 0: 10981.3. Samples: 100652358. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:18,976][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:20,462][626795] Updated weights for policy 0, policy_version 171232 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:22,045][626795] Updated weights for policy 0, policy_version 171242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:23,796][626795] Updated weights for policy 0, policy_version 171252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:23,976][24592] Fps is (10 sec: 46690.6, 60 sec: 44509.3, 300 sec: 43126.0). Total num frames: 1402896384. Throughput: 0: 10964.7. Samples: 100722252. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:23,977][24592] Avg episode reward: [(0, '4.280')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:25,624][626795] Updated weights for policy 0, policy_version 171262 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:27,406][626795] Updated weights for policy 0, policy_version 171272 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:30,684][24592] Fps is (10 sec: 36381.3, 60 sec: 42613.5, 300 sec: 42739.6). Total num frames: 1403084800. Throughput: 0: 10546.3. Samples: 100756452. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:30,685][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:31,825][626795] Updated weights for policy 0, policy_version 171282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:33,536][626795] Updated weights for policy 0, policy_version 171292 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:33,975][24592] Fps is (10 sec: 34409.3, 60 sec: 42598.4, 300 sec: 42737.3). Total num frames: 1403240448. Throughput: 0: 10300.6. Samples: 100796598. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:33,976][24592] Avg episode reward: [(0, '4.805')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:35,235][626795] Updated weights for policy 0, policy_version 171302 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:37,013][626795] Updated weights for policy 0, policy_version 171312 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:38,837][626795] Updated weights for policy 0, policy_version 171322 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:38,975][24592] Fps is (10 sec: 46438.3, 60 sec: 42461.9, 300 sec: 42709.5). Total num frames: 1403469824. Throughput: 0: 11566.2. Samples: 100866186. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:38,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:40,571][626795] Updated weights for policy 0, policy_version 171332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:42,330][626795] Updated weights for policy 0, policy_version 171342 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:43,976][24592] Fps is (10 sec: 46691.9, 60 sec: 42461.5, 300 sec: 42737.2). Total num frames: 1403707392. Throughput: 0: 10954.7. Samples: 100901202. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:43,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:44,095][626795] Updated weights for policy 0, policy_version 171352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:45,749][626795] Updated weights for policy 0, policy_version 171362 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:47,571][626795] Updated weights for policy 0, policy_version 171372 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:48,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42598.5, 300 sec: 43153.8). Total num frames: 1403944960. Throughput: 0: 10983.1. Samples: 100971834. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:48,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:49,325][626795] Updated weights for policy 0, policy_version 171382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:51,067][626795] Updated weights for policy 0, policy_version 171392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:52,804][626795] Updated weights for policy 0, policy_version 171402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:53,975][24592] Fps is (10 sec: 46696.9, 60 sec: 44557.8, 300 sec: 43181.6). Total num frames: 1404174336. Throughput: 0: 10996.1. Samples: 101041620. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:53,976][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:54,618][626795] Updated weights for policy 0, policy_version 171412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:56,328][626795] Updated weights for policy 0, policy_version 171422 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:58,094][626795] Updated weights for policy 0, policy_version 171432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:58,976][24592] Fps is (10 sec: 45874.0, 60 sec: 44510.1, 300 sec: 43154.0). Total num frames: 1404403712. Throughput: 0: 10980.3. Samples: 101076258. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:57:58,977][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:57:59,929][626795] Updated weights for policy 0, policy_version 171442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:01,695][626795] Updated weights for policy 0, policy_version 171452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:05,474][24592] Fps is (10 sec: 36335.5, 60 sec: 42759.5, 300 sec: 42825.2). Total num frames: 1404592128. Throughput: 0: 10604.2. Samples: 101145432. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:05,474][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:05,973][626795] Updated weights for policy 0, policy_version 171462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:07,787][626795] Updated weights for policy 0, policy_version 171472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:08,975][24592] Fps is (10 sec: 34407.1, 60 sec: 42461.8, 300 sec: 42792.8). Total num frames: 1404747776. Throughput: 0: 10288.6. Samples: 101185230. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:08,977][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:09,576][626795] Updated weights for policy 0, policy_version 171482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:11,327][626795] Updated weights for policy 0, policy_version 171492 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:13,076][626795] Updated weights for policy 0, policy_version 171502 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:13,975][24592] Fps is (10 sec: 46250.6, 60 sec: 42598.4, 300 sec: 42820.6). Total num frames: 1404985344. Throughput: 0: 10714.5. Samples: 101220294. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:13,977][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:14,877][626795] Updated weights for policy 0, policy_version 171512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:16,607][626795] Updated weights for policy 0, policy_version 171522 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:18,249][626795] Updated weights for policy 0, policy_version 171532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:18,975][24592] Fps is (10 sec: 47514.0, 60 sec: 42735.0, 300 sec: 42848.5). Total num frames: 1405222912. Throughput: 0: 10973.1. Samples: 101290386. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:18,976][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:20,083][626795] Updated weights for policy 0, policy_version 171542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:21,764][626795] Updated weights for policy 0, policy_version 171552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:23,551][626795] Updated weights for policy 0, policy_version 171562 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:23,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42599.0, 300 sec: 43209.3). Total num frames: 1405452288. Throughput: 0: 10995.2. Samples: 101360970. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:23,976][24592] Avg episode reward: [(0, '4.929')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:25,312][626795] Updated weights for policy 0, policy_version 171572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:27,066][626795] Updated weights for policy 0, policy_version 171582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:28,865][626795] Updated weights for policy 0, policy_version 171592 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:28,975][24592] Fps is (10 sec: 46694.2, 60 sec: 44690.5, 300 sec: 43237.1). Total num frames: 1405689856. Throughput: 0: 10981.9. Samples: 101395380. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:28,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:30,636][626795] Updated weights for policy 0, policy_version 171602 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:32,414][626795] Updated weights for policy 0, policy_version 171612 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:33,976][24592] Fps is (10 sec: 45871.5, 60 sec: 44509.2, 300 sec: 43209.2). Total num frames: 1405911040. Throughput: 0: 10952.5. Samples: 101464704. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:33,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:34,230][626795] Updated weights for policy 0, policy_version 171622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:35,989][626795] Updated weights for policy 0, policy_version 171632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:37,635][626795] Updated weights for policy 0, policy_version 171642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:40,273][24592] Fps is (10 sec: 35529.8, 60 sec: 42765.7, 300 sec: 42854.3). Total num frames: 1406091264. Throughput: 0: 9890.5. Samples: 101499528. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:40,274][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:42,036][626795] Updated weights for policy 0, policy_version 171652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:43,826][626795] Updated weights for policy 0, policy_version 171662 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:43,976][24592] Fps is (10 sec: 35227.1, 60 sec: 42598.5, 300 sec: 42848.3). Total num frames: 1406263296. Throughput: 0: 10292.0. Samples: 101539398. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:43,977][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:45,584][626795] Updated weights for policy 0, policy_version 171672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:47,373][626795] Updated weights for policy 0, policy_version 171682 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:48,975][24592] Fps is (10 sec: 45185.6, 60 sec: 42325.3, 300 sec: 42792.8). Total num frames: 1406484480. Throughput: 0: 10660.8. Samples: 101609196. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:48,977][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:49,126][626795] Updated weights for policy 0, policy_version 171692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:50,970][626795] Updated weights for policy 0, policy_version 171702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:52,689][626795] Updated weights for policy 0, policy_version 171712 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:53,975][24592] Fps is (10 sec: 45877.2, 60 sec: 42461.9, 300 sec: 42820.6). Total num frames: 1406722048. Throughput: 0: 10954.8. Samples: 101678196. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:53,977][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:54,409][626795] Updated weights for policy 0, policy_version 171722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:56,172][626795] Updated weights for policy 0, policy_version 171732 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:57,870][626795] Updated weights for policy 0, policy_version 171742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:58,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42598.6, 300 sec: 43237.1). Total num frames: 1406959616. Throughput: 0: 10952.7. Samples: 101713164. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:58:58,976][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:58:59,639][626795] Updated weights for policy 0, policy_version 171752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:01,480][626795] Updated weights for policy 0, policy_version 171762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:03,270][626795] Updated weights for policy 0, policy_version 171772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:03,976][24592] Fps is (10 sec: 45873.5, 60 sec: 44249.2, 300 sec: 43209.3). Total num frames: 1407180800. Throughput: 0: 10945.0. Samples: 101782914. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:03,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000171775_1407180800.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:04,068][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000170516_1396867072.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:04,974][626795] Updated weights for policy 0, policy_version 171782 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:06,858][626795] Updated weights for policy 0, policy_version 171792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:07,062][626772] Signal inference workers to stop experience collection... (1450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:07,063][626772] Signal inference workers to resume experience collection... (1450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:07,069][626795] InferenceWorker_p0-w0: stopping experience collection (1450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:07,073][626795] InferenceWorker_p0-w0: resuming experience collection (1450 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:08,699][626795] Updated weights for policy 0, policy_version 171802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:08,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44509.9, 300 sec: 43237.1). Total num frames: 1407418368. Throughput: 0: 10893.5. Samples: 101851176. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:08,977][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:10,469][626795] Updated weights for policy 0, policy_version 171812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:12,285][626795] Updated weights for policy 0, policy_version 171822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:15,071][24592] Fps is (10 sec: 34700.3, 60 sec: 42236.5, 300 sec: 42800.4). Total num frames: 1407565824. Throughput: 0: 10641.5. Samples: 101885910. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:15,072][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:16,536][626795] Updated weights for policy 0, policy_version 171832 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:18,223][626795] Updated weights for policy 0, policy_version 171842 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:18,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42188.8, 300 sec: 42792.8). Total num frames: 1407754240. Throughput: 0: 10232.3. Samples: 101925150. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:18,977][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:20,027][626795] Updated weights for policy 0, policy_version 171852 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:21,874][626795] Updated weights for policy 0, policy_version 171862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:23,649][626795] Updated weights for policy 0, policy_version 171872 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:23,977][24592] Fps is (10 sec: 46918.5, 60 sec: 42188.3, 300 sec: 42792.8). Total num frames: 1407983616. Throughput: 0: 11323.5. Samples: 101994396. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:23,979][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:25,481][626795] Updated weights for policy 0, policy_version 171882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:27,164][626795] Updated weights for policy 0, policy_version 171892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:28,920][626795] Updated weights for policy 0, policy_version 171902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:28,975][24592] Fps is (10 sec: 46694.0, 60 sec: 42188.7, 300 sec: 42792.8). Total num frames: 1408221184. Throughput: 0: 10885.0. Samples: 102029220. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:28,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:30,695][626795] Updated weights for policy 0, policy_version 171912 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:32,528][626795] Updated weights for policy 0, policy_version 171922 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:33,982][24592] Fps is (10 sec: 47484.8, 60 sec: 42457.7, 300 sec: 43208.3). Total num frames: 1408458752. Throughput: 0: 10884.1. Samples: 102099054. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:33,983][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:34,177][626795] Updated weights for policy 0, policy_version 171932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:36,025][626795] Updated weights for policy 0, policy_version 171942 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:37,795][626795] Updated weights for policy 0, policy_version 171952 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:38,976][24592] Fps is (10 sec: 45874.5, 60 sec: 44098.2, 300 sec: 43181.6). Total num frames: 1408679936. Throughput: 0: 10885.4. Samples: 102168042. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:38,978][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:39,651][626795] Updated weights for policy 0, policy_version 171962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:41,405][626795] Updated weights for policy 0, policy_version 171972 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:43,102][626795] Updated weights for policy 0, policy_version 171982 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:43,975][24592] Fps is (10 sec: 45086.5, 60 sec: 44100.6, 300 sec: 43153.8). Total num frames: 1408909312. Throughput: 0: 10863.5. Samples: 102202020. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:43,978][24592] Avg episode reward: [(0, '4.894')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:45,006][626795] Updated weights for policy 0, policy_version 171992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:46,747][626795] Updated weights for policy 0, policy_version 172002 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:49,913][24592] Fps is (10 sec: 34454.6, 60 sec: 42212.0, 300 sec: 42768.0). Total num frames: 1409056768. Throughput: 0: 9871.5. Samples: 102236382. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:49,914][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:51,255][626795] Updated weights for policy 0, policy_version 172012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:52,900][626795] Updated weights for policy 0, policy_version 172022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:53,975][24592] Fps is (10 sec: 33587.1, 60 sec: 42052.2, 300 sec: 42737.3). Total num frames: 1409245184. Throughput: 0: 10193.9. Samples: 102309900. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:53,976][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:54,832][626795] Updated weights for policy 0, policy_version 172032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:56,565][626795] Updated weights for policy 0, policy_version 172042 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 10:59:58,338][626795] Updated weights for policy 0, policy_version 172052 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:58,975][24592] Fps is (10 sec: 46100.2, 60 sec: 41915.7, 300 sec: 42737.2). Total num frames: 1409474560. Throughput: 0: 10427.0. Samples: 102343698. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 10:59:58,976][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:00,163][626795] Updated weights for policy 0, policy_version 172062 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:01,829][626795] Updated weights for policy 0, policy_version 172072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:03,691][626795] Updated weights for policy 0, policy_version 172082 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:03,975][24592] Fps is (10 sec: 45875.5, 60 sec: 42052.6, 300 sec: 43115.2). Total num frames: 1409703936. Throughput: 0: 10837.6. Samples: 102412842. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:03,977][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:05,488][626795] Updated weights for policy 0, policy_version 172092 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:07,126][626795] Updated weights for policy 0, policy_version 172102 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:08,881][626795] Updated weights for policy 0, policy_version 172112 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:08,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42052.3, 300 sec: 43153.8). Total num frames: 1409941504. Throughput: 0: 10853.4. Samples: 102482790. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:08,976][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:10,777][626795] Updated weights for policy 0, policy_version 172122 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:12,389][626795] Updated weights for policy 0, policy_version 172132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:13,976][24592] Fps is (10 sec: 46693.4, 60 sec: 44225.4, 300 sec: 43126.0). Total num frames: 1410170880. Throughput: 0: 10853.8. Samples: 102517644. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:13,978][24592] Avg episode reward: [(0, '4.885')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:14,198][626795] Updated weights for policy 0, policy_version 172142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:16,037][626795] Updated weights for policy 0, policy_version 172152 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:17,841][626795] Updated weights for policy 0, policy_version 172162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:18,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44100.3, 300 sec: 43098.3). Total num frames: 1410400256. Throughput: 0: 10836.3. Samples: 102586614. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:18,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:19,554][626795] Updated weights for policy 0, policy_version 172172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:21,388][626795] Updated weights for policy 0, policy_version 172182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:24,740][24592] Fps is (10 sec: 35007.1, 60 sec: 42197.7, 300 sec: 42709.9). Total num frames: 1410547712. Throughput: 0: 9905.9. Samples: 102621378. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:24,742][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:25,714][626795] Updated weights for policy 0, policy_version 172192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:27,465][626795] Updated weights for policy 0, policy_version 172202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:28,975][24592] Fps is (10 sec: 34406.5, 60 sec: 42052.3, 300 sec: 42709.5). Total num frames: 1410744320. Throughput: 0: 10181.3. Samples: 102660180. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:28,977][24592] Avg episode reward: [(0, '4.412')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:29,334][626795] Updated weights for policy 0, policy_version 172212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:31,191][626795] Updated weights for policy 0, policy_version 172222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:32,956][626795] Updated weights for policy 0, policy_version 172232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:33,975][24592] Fps is (10 sec: 46125.2, 60 sec: 41920.5, 300 sec: 42709.5). Total num frames: 1410973696. Throughput: 0: 11176.3. Samples: 102728838. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:33,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:34,587][626795] Updated weights for policy 0, policy_version 172242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:36,421][626795] Updated weights for policy 0, policy_version 172252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:38,155][626795] Updated weights for policy 0, policy_version 172262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:38,975][24592] Fps is (10 sec: 45874.7, 60 sec: 42052.4, 300 sec: 43067.1). Total num frames: 1411203072. Throughput: 0: 10863.6. Samples: 102798762. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:38,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:39,915][626795] Updated weights for policy 0, policy_version 172272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:41,703][626795] Updated weights for policy 0, policy_version 172282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:43,456][626795] Updated weights for policy 0, policy_version 172292 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:43,975][24592] Fps is (10 sec: 45874.5, 60 sec: 42052.2, 300 sec: 43098.2). Total num frames: 1411432448. Throughput: 0: 10890.4. Samples: 102833766. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:43,977][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:45,178][626795] Updated weights for policy 0, policy_version 172302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:47,029][626795] Updated weights for policy 0, policy_version 172312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:48,799][626795] Updated weights for policy 0, policy_version 172322 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:48,975][24592] Fps is (10 sec: 45875.0, 60 sec: 44106.5, 300 sec: 43098.3). Total num frames: 1411661824. Throughput: 0: 10886.8. Samples: 102902748. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:48,977][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:50,605][626795] Updated weights for policy 0, policy_version 172332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:52,492][626795] Updated weights for policy 0, policy_version 172342 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:53,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44100.2, 300 sec: 43098.2). Total num frames: 1411891200. Throughput: 0: 10852.0. Samples: 102971130. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:53,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:54,252][626795] Updated weights for policy 0, policy_version 172352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:00:55,985][626795] Updated weights for policy 0, policy_version 172362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:59,573][24592] Fps is (10 sec: 34784.2, 60 sec: 42178.0, 300 sec: 42650.8). Total num frames: 1412030464. Throughput: 0: 10694.2. Samples: 103005276. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:00:59,575][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:00,366][626795] Updated weights for policy 0, policy_version 172372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:02,165][626795] Updated weights for policy 0, policy_version 172382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:03,955][626795] Updated weights for policy 0, policy_version 172392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:03,975][24592] Fps is (10 sec: 34406.7, 60 sec: 42188.7, 300 sec: 42654.0). Total num frames: 1412235264. Throughput: 0: 10175.6. Samples: 103044516. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:03,977][24592] Avg episode reward: [(0, '4.833')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000172392_1412235264.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:04,032][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000171139_1401970688.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:05,731][626795] Updated weights for policy 0, policy_version 172402 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:07,473][626795] Updated weights for policy 0, policy_version 172412 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:08,975][24592] Fps is (10 sec: 46178.9, 60 sec: 42052.2, 300 sec: 42653.9). Total num frames: 1412464640. Throughput: 0: 11122.2. Samples: 103113372. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:08,977][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:09,296][626795] Updated weights for policy 0, policy_version 172422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:11,034][626795] Updated weights for policy 0, policy_version 172432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:12,783][626795] Updated weights for policy 0, policy_version 172442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:13,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42052.4, 300 sec: 43021.6). Total num frames: 1412694016. Throughput: 0: 10854.1. Samples: 103148616. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:13,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:14,544][626795] Updated weights for policy 0, policy_version 172452 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:16,316][626795] Updated weights for policy 0, policy_version 172462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:18,057][626795] Updated weights for policy 0, policy_version 172472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:18,975][24592] Fps is (10 sec: 45875.5, 60 sec: 42052.3, 300 sec: 43042.7). Total num frames: 1412923392. Throughput: 0: 10882.5. Samples: 103218552. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:18,977][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:19,851][626795] Updated weights for policy 0, policy_version 172482 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:21,673][626795] Updated weights for policy 0, policy_version 172492 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:23,430][626795] Updated weights for policy 0, policy_version 172502 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:23,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44116.3, 300 sec: 43070.5). Total num frames: 1413160960. Throughput: 0: 10860.1. Samples: 103287468. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:23,977][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:25,229][626795] Updated weights for policy 0, policy_version 172512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:26,974][626795] Updated weights for policy 0, policy_version 172522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:28,706][626795] Updated weights for policy 0, policy_version 172532 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:28,977][24592] Fps is (10 sec: 46689.0, 60 sec: 44099.4, 300 sec: 43070.3). Total num frames: 1413390336. Throughput: 0: 10844.4. Samples: 103321776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:28,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:30,541][626795] Updated weights for policy 0, policy_version 172542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:34,411][24592] Fps is (10 sec: 34538.6, 60 sec: 42155.4, 300 sec: 42646.4). Total num frames: 1413521408. Throughput: 0: 10541.5. Samples: 103381710. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:34,413][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:34,951][626795] Updated weights for policy 0, policy_version 172552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:36,755][626795] Updated weights for policy 0, policy_version 172562 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:38,517][626795] Updated weights for policy 0, policy_version 172572 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:38,976][24592] Fps is (10 sec: 33590.3, 60 sec: 42052.2, 300 sec: 42598.4). Total num frames: 1413726208. Throughput: 0: 10194.6. Samples: 103429890. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:38,976][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:40,308][626795] Updated weights for policy 0, policy_version 172582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:42,119][626795] Updated weights for policy 0, policy_version 172592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:43,882][626795] Updated weights for policy 0, policy_version 172602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:43,975][24592] Fps is (10 sec: 45397.5, 60 sec: 42052.4, 300 sec: 42598.4). Total num frames: 1413955584. Throughput: 0: 10324.7. Samples: 103463712. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:43,979][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:45,587][626795] Updated weights for policy 0, policy_version 172612 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:47,341][626795] Updated weights for policy 0, policy_version 172622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:48,975][24592] Fps is (10 sec: 46695.6, 60 sec: 42188.9, 300 sec: 43014.7). Total num frames: 1414193152. Throughput: 0: 10877.3. Samples: 103533996. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:49,005][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:49,079][626795] Updated weights for policy 0, policy_version 172632 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:50,874][626795] Updated weights for policy 0, policy_version 172642 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:52,672][626795] Updated weights for policy 0, policy_version 172652 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:53,976][24592] Fps is (10 sec: 46693.8, 60 sec: 42188.8, 300 sec: 43015.0). Total num frames: 1414422528. Throughput: 0: 10903.6. Samples: 103604034. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:53,978][24592] Avg episode reward: [(0, '4.873')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:54,349][626795] Updated weights for policy 0, policy_version 172662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:56,153][626795] Updated weights for policy 0, policy_version 172672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:57,929][626795] Updated weights for policy 0, policy_version 172682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:58,976][24592] Fps is (10 sec: 46693.3, 60 sec: 44268.3, 300 sec: 43042.7). Total num frames: 1414660096. Throughput: 0: 10876.7. Samples: 103638072. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:01:58,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:01:59,693][626795] Updated weights for policy 0, policy_version 172692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:01,555][626795] Updated weights for policy 0, policy_version 172702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:03,195][626795] Updated weights for policy 0, policy_version 172712 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:03,975][24592] Fps is (10 sec: 46695.1, 60 sec: 44236.8, 300 sec: 43014.9). Total num frames: 1414889472. Throughput: 0: 10876.3. Samples: 103707984. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:03,976][24592] Avg episode reward: [(0, '4.972')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:05,030][626795] Updated weights for policy 0, policy_version 172722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:09,260][24592] Fps is (10 sec: 34250.0, 60 sec: 42261.1, 300 sec: 42612.8). Total num frames: 1415012352. Throughput: 0: 10031.0. Samples: 103741722. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:09,262][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:09,505][626795] Updated weights for policy 0, policy_version 172732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:11,190][626795] Updated weights for policy 0, policy_version 172742 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:13,004][626795] Updated weights for policy 0, policy_version 172752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:13,976][24592] Fps is (10 sec: 33584.4, 60 sec: 42188.2, 300 sec: 42598.3). Total num frames: 1415225344. Throughput: 0: 10207.3. Samples: 103781100. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:13,978][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:14,789][626795] Updated weights for policy 0, policy_version 172762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:16,553][626795] Updated weights for policy 0, policy_version 172772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:18,251][626795] Updated weights for policy 0, policy_version 172782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:18,975][24592] Fps is (10 sec: 45534.9, 60 sec: 42188.8, 300 sec: 42570.8). Total num frames: 1415454720. Throughput: 0: 10515.7. Samples: 103850328. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:18,976][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:20,101][626795] Updated weights for policy 0, policy_version 172792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:21,836][626795] Updated weights for policy 0, policy_version 172802 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:23,628][626795] Updated weights for policy 0, policy_version 172812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:23,975][24592] Fps is (10 sec: 46698.3, 60 sec: 42188.8, 300 sec: 42986.3). Total num frames: 1415692288. Throughput: 0: 10904.6. Samples: 103920594. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:23,977][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:25,413][626795] Updated weights for policy 0, policy_version 172822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:26,982][626795] Updated weights for policy 0, policy_version 172832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:28,872][626795] Updated weights for policy 0, policy_version 172842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:28,976][24592] Fps is (10 sec: 46693.2, 60 sec: 42189.5, 300 sec: 42987.1). Total num frames: 1415921664. Throughput: 0: 10934.2. Samples: 103955754. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:28,977][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:30,640][626795] Updated weights for policy 0, policy_version 172852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:32,380][626795] Updated weights for policy 0, policy_version 172862 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:33,993][24592] Fps is (10 sec: 45792.5, 60 sec: 44134.8, 300 sec: 42984.5). Total num frames: 1416151040. Throughput: 0: 10897.8. Samples: 104024592. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:33,996][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:34,270][626795] Updated weights for policy 0, policy_version 172872 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:35,995][626795] Updated weights for policy 0, policy_version 172882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:37,732][626795] Updated weights for policy 0, policy_version 172892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:38,976][24592] Fps is (10 sec: 46694.6, 60 sec: 44373.4, 300 sec: 42987.2). Total num frames: 1416388608. Throughput: 0: 10896.0. Samples: 104094354. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:38,976][24592] Avg episode reward: [(0, '4.932')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:39,528][626795] Updated weights for policy 0, policy_version 172902 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:41,303][626795] Updated weights for policy 0, policy_version 172912 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:44,112][24592] Fps is (10 sec: 34813.4, 60 sec: 42365.5, 300 sec: 42550.9). Total num frames: 1416503296. Throughput: 0: 10109.0. Samples: 104094354. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:44,112][24592] Avg episode reward: [(0, '4.961')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:45,731][626795] Updated weights for policy 0, policy_version 172922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:47,475][626795] Updated weights for policy 0, policy_version 172932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:48,975][24592] Fps is (10 sec: 33587.9, 60 sec: 42188.8, 300 sec: 42542.9). Total num frames: 1416724480. Throughput: 0: 10200.8. Samples: 104167020. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:48,977][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:49,409][626795] Updated weights for policy 0, policy_version 172942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:51,019][626795] Updated weights for policy 0, policy_version 172952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:52,941][626795] Updated weights for policy 0, policy_version 172962 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:53,975][24592] Fps is (10 sec: 44848.9, 60 sec: 42052.4, 300 sec: 42515.1). Total num frames: 1416945664. Throughput: 0: 11041.4. Samples: 104235438. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:53,977][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:54,703][626795] Updated weights for policy 0, policy_version 172972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:56,465][626795] Updated weights for policy 0, policy_version 172982 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:58,158][626795] Updated weights for policy 0, policy_version 172992 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:58,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42052.5, 300 sec: 42899.6). Total num frames: 1417183232. Throughput: 0: 10867.1. Samples: 104270112. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:02:58,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:02:59,874][626795] Updated weights for policy 0, policy_version 173002 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:01,565][626795] Updated weights for policy 0, policy_version 173012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:03,365][626795] Updated weights for policy 0, policy_version 173022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:03,976][24592] Fps is (10 sec: 47511.3, 60 sec: 42188.5, 300 sec: 42959.3). Total num frames: 1417420800. Throughput: 0: 10922.7. Samples: 104341854. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:03,978][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000173025_1417420800.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:04,042][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000171775_1407180800.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:05,091][626795] Updated weights for policy 0, policy_version 173032 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:06,812][626795] Updated weights for policy 0, policy_version 173042 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:08,672][626795] Updated weights for policy 0, policy_version 173052 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:08,976][24592] Fps is (10 sec: 47512.7, 60 sec: 44310.7, 300 sec: 42959.4). Total num frames: 1417658368. Throughput: 0: 10913.6. Samples: 104411706. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:08,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:10,475][626795] Updated weights for policy 0, policy_version 173062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:12,109][626795] Updated weights for policy 0, policy_version 173072 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:13,914][626795] Updated weights for policy 0, policy_version 173082 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:13,976][24592] Fps is (10 sec: 46696.0, 60 sec: 44373.8, 300 sec: 42931.6). Total num frames: 1417887744. Throughput: 0: 10899.8. Samples: 104446242. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:13,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:15,754][626795] Updated weights for policy 0, policy_version 173092 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:19,089][24592] Fps is (10 sec: 33210.4, 60 sec: 42245.3, 300 sec: 42498.7). Total num frames: 1417994240. Throughput: 0: 10111.3. Samples: 104480568. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:19,091][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:20,273][626795] Updated weights for policy 0, policy_version 173102 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:21,990][626795] Updated weights for policy 0, policy_version 173112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:23,911][626795] Updated weights for policy 0, policy_version 173122 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:23,975][24592] Fps is (10 sec: 33587.7, 60 sec: 42188.8, 300 sec: 42487.3). Total num frames: 1418223616. Throughput: 0: 10175.1. Samples: 104552232. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:23,976][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:25,700][626795] Updated weights for policy 0, policy_version 173132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:27,380][626795] Updated weights for policy 0, policy_version 173142 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:28,976][24592] Fps is (10 sec: 45573.4, 60 sec: 42052.4, 300 sec: 42487.4). Total num frames: 1418444800. Throughput: 0: 10973.0. Samples: 104586642. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:28,978][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:29,071][626795] Updated weights for policy 0, policy_version 173152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:30,914][626795] Updated weights for policy 0, policy_version 173162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:32,575][626795] Updated weights for policy 0, policy_version 173172 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:33,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42338.1, 300 sec: 42898.2). Total num frames: 1418690560. Throughput: 0: 10906.5. Samples: 104657814. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:33,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:34,287][626795] Updated weights for policy 0, policy_version 173182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:36,031][626795] Updated weights for policy 0, policy_version 173192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:37,711][626795] Updated weights for policy 0, policy_version 173202 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:38,976][24592] Fps is (10 sec: 48332.2, 60 sec: 42325.3, 300 sec: 42931.7). Total num frames: 1418928128. Throughput: 0: 10975.1. Samples: 104729322. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:38,977][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:39,519][626795] Updated weights for policy 0, policy_version 173212 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:41,276][626795] Updated weights for policy 0, policy_version 173222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:42,899][626795] Updated weights for policy 0, policy_version 173232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:43,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44337.6, 300 sec: 42959.4). Total num frames: 1419157504. Throughput: 0: 10985.3. Samples: 104764452. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:43,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:44,712][626795] Updated weights for policy 0, policy_version 173242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:46,432][626795] Updated weights for policy 0, policy_version 173252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:48,335][626795] Updated weights for policy 0, policy_version 173262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:48,975][24592] Fps is (10 sec: 45876.0, 60 sec: 44373.3, 300 sec: 42931.6). Total num frames: 1419386880. Throughput: 0: 10934.0. Samples: 104833878. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:48,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:49,988][626795] Updated weights for policy 0, policy_version 173272 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:54,190][24592] Fps is (10 sec: 33683.9, 60 sec: 42446.6, 300 sec: 42484.2). Total num frames: 1419501568. Throughput: 0: 10092.7. Samples: 104868042. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:54,193][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:54,757][626795] Updated weights for policy 0, policy_version 173282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:56,522][626795] Updated weights for policy 0, policy_version 173292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:03:58,458][626795] Updated weights for policy 0, policy_version 173302 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:58,976][24592] Fps is (10 sec: 31947.6, 60 sec: 42051.9, 300 sec: 42459.5). Total num frames: 1419706368. Throughput: 0: 10174.3. Samples: 104904090. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:03:58,977][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:00,212][626795] Updated weights for policy 0, policy_version 173312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:02,001][626795] Updated weights for policy 0, policy_version 173322 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:03,786][626795] Updated weights for policy 0, policy_version 173332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:03,975][24592] Fps is (10 sec: 44369.4, 60 sec: 41916.1, 300 sec: 42431.8). Total num frames: 1419935744. Throughput: 0: 10936.3. Samples: 104971458. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:03,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:05,495][626795] Updated weights for policy 0, policy_version 173342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:07,179][626795] Updated weights for policy 0, policy_version 173352 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:08,975][24592] Fps is (10 sec: 46696.5, 60 sec: 41915.9, 300 sec: 42896.6). Total num frames: 1420173312. Throughput: 0: 10885.7. Samples: 105042090. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:08,976][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:09,023][626795] Updated weights for policy 0, policy_version 173362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:10,681][626795] Updated weights for policy 0, policy_version 173372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:12,351][626795] Updated weights for policy 0, policy_version 173382 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:13,976][24592] Fps is (10 sec: 47512.7, 60 sec: 42052.2, 300 sec: 42903.8). Total num frames: 1420410880. Throughput: 0: 10920.6. Samples: 105078072. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:13,977][24592] Avg episode reward: [(0, '5.023')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:14,138][626795] Updated weights for policy 0, policy_version 173392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:15,799][626795] Updated weights for policy 0, policy_version 173402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:17,588][626795] Updated weights for policy 0, policy_version 173412 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:18,295][626772] Signal inference workers to stop experience collection... (1500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:18,301][626772] Signal inference workers to resume experience collection... (1500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:18,308][626795] InferenceWorker_p0-w0: stopping experience collection (1500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:18,311][626795] InferenceWorker_p0-w0: resuming experience collection (1500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:18,975][24592] Fps is (10 sec: 48332.9, 60 sec: 44457.6, 300 sec: 42959.5). Total num frames: 1420656640. Throughput: 0: 10916.1. Samples: 105149040. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:18,976][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:19,402][626795] Updated weights for policy 0, policy_version 173422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:21,088][626795] Updated weights for policy 0, policy_version 173432 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:22,843][626795] Updated weights for policy 0, policy_version 173442 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:23,975][24592] Fps is (10 sec: 46695.1, 60 sec: 44236.8, 300 sec: 42903.9). Total num frames: 1420877824. Throughput: 0: 10864.6. Samples: 105218226. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:23,977][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:24,657][626795] Updated weights for policy 0, policy_version 173452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:26,403][626795] Updated weights for policy 0, policy_version 173462 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:29,230][24592] Fps is (10 sec: 33551.9, 60 sec: 42418.4, 300 sec: 42479.4). Total num frames: 1421000704. Throughput: 0: 10027.1. Samples: 105218226. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:29,233][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:31,067][626795] Updated weights for policy 0, policy_version 173472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:32,942][626795] Updated weights for policy 0, policy_version 173482 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:33,975][24592] Fps is (10 sec: 32768.1, 60 sec: 41915.7, 300 sec: 42459.6). Total num frames: 1421205504. Throughput: 0: 10106.5. Samples: 105288672. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:33,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:34,785][626795] Updated weights for policy 0, policy_version 173492 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:36,582][626795] Updated weights for policy 0, policy_version 173502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:38,427][626795] Updated weights for policy 0, policy_version 173512 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:38,975][24592] Fps is (10 sec: 44551.7, 60 sec: 41779.3, 300 sec: 42459.5). Total num frames: 1421434880. Throughput: 0: 10894.7. Samples: 105355968. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:38,976][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:40,142][626795] Updated weights for policy 0, policy_version 173522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:41,918][626795] Updated weights for policy 0, policy_version 173532 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:43,601][626795] Updated weights for policy 0, policy_version 173542 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:43,976][24592] Fps is (10 sec: 46692.6, 60 sec: 41915.5, 300 sec: 42901.3). Total num frames: 1421672448. Throughput: 0: 10829.6. Samples: 105391422. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:43,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:45,307][626795] Updated weights for policy 0, policy_version 173552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:47,091][626795] Updated weights for policy 0, policy_version 173562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:48,788][626795] Updated weights for policy 0, policy_version 173572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:48,975][24592] Fps is (10 sec: 47514.1, 60 sec: 42052.3, 300 sec: 42931.6). Total num frames: 1421910016. Throughput: 0: 10910.3. Samples: 105462420. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:48,976][24592] Avg episode reward: [(0, '4.380')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:50,515][626795] Updated weights for policy 0, policy_version 173582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:52,252][626795] Updated weights for policy 0, policy_version 173592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:53,975][24592] Fps is (10 sec: 46696.4, 60 sec: 44121.5, 300 sec: 42931.6). Total num frames: 1422139392. Throughput: 0: 10913.7. Samples: 105533208. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:53,976][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:54,016][626795] Updated weights for policy 0, policy_version 173602 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:55,739][626795] Updated weights for policy 0, policy_version 173612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:57,578][626795] Updated weights for policy 0, policy_version 173622 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:58,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44510.2, 300 sec: 42959.4). Total num frames: 1422376960. Throughput: 0: 10885.5. Samples: 105567918. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:04:58,976][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:04:59,383][626795] Updated weights for policy 0, policy_version 173632 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:01,147][626795] Updated weights for policy 0, policy_version 173642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:04,258][24592] Fps is (10 sec: 33459.7, 60 sec: 42262.6, 300 sec: 42474.4). Total num frames: 1422483456. Throughput: 0: 10014.9. Samples: 105602544. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:04,260][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:04,275][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000173644_1422491648.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:04,356][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000172392_1412235264.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:05,737][626795] Updated weights for policy 0, policy_version 173652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:07,642][626795] Updated weights for policy 0, policy_version 173662 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:08,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42052.2, 300 sec: 42459.6). Total num frames: 1422696448. Throughput: 0: 10095.6. Samples: 105672528. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:08,976][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:09,337][626795] Updated weights for policy 0, policy_version 173672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:11,263][626795] Updated weights for policy 0, policy_version 173682 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:13,025][626795] Updated weights for policy 0, policy_version 173692 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:13,975][24592] Fps is (10 sec: 45524.7, 60 sec: 41915.9, 300 sec: 42459.6). Total num frames: 1422925824. Throughput: 0: 10906.4. Samples: 105706236. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:13,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:14,711][626795] Updated weights for policy 0, policy_version 173702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:16,448][626795] Updated weights for policy 0, policy_version 173712 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:18,185][626795] Updated weights for policy 0, policy_version 173722 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:18,975][24592] Fps is (10 sec: 46694.5, 60 sec: 41779.2, 300 sec: 42876.2). Total num frames: 1423163392. Throughput: 0: 10851.6. Samples: 105776994. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:18,977][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:19,962][626795] Updated weights for policy 0, policy_version 173732 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:21,743][626795] Updated weights for policy 0, policy_version 173742 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:23,449][626795] Updated weights for policy 0, policy_version 173752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:23,976][24592] Fps is (10 sec: 46692.1, 60 sec: 41915.4, 300 sec: 42876.0). Total num frames: 1423392768. Throughput: 0: 10910.3. Samples: 105846936. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:23,977][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:25,268][626795] Updated weights for policy 0, policy_version 173762 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:26,862][626795] Updated weights for policy 0, policy_version 173772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:28,594][626795] Updated weights for policy 0, policy_version 173782 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:28,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44151.1, 300 sec: 42931.6). Total num frames: 1423638528. Throughput: 0: 10922.1. Samples: 105882912. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:28,976][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:30,335][626795] Updated weights for policy 0, policy_version 173792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:32,003][626795] Updated weights for policy 0, policy_version 173802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:33,892][626795] Updated weights for policy 0, policy_version 173812 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:33,975][24592] Fps is (10 sec: 47515.6, 60 sec: 44373.3, 300 sec: 42931.6). Total num frames: 1423867904. Throughput: 0: 10910.8. Samples: 105953406. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:33,977][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:35,626][626795] Updated weights for policy 0, policy_version 173822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:39,324][24592] Fps is (10 sec: 33248.7, 60 sec: 42216.9, 300 sec: 42492.7). Total num frames: 1423982592. Throughput: 0: 10032.1. Samples: 105988146. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:39,325][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:40,248][626795] Updated weights for policy 0, policy_version 173832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:42,203][626795] Updated weights for policy 0, policy_version 173842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:43,964][626795] Updated weights for policy 0, policy_version 173852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:43,975][24592] Fps is (10 sec: 32768.2, 60 sec: 42052.5, 300 sec: 42487.3). Total num frames: 1424195584. Throughput: 0: 10138.1. Samples: 106024134. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:43,976][24592] Avg episode reward: [(0, '4.999')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:45,797][626795] Updated weights for policy 0, policy_version 173862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:47,726][626795] Updated weights for policy 0, policy_version 173872 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:48,975][24592] Fps is (10 sec: 44984.1, 60 sec: 41779.2, 300 sec: 42459.6). Total num frames: 1424416768. Throughput: 0: 10911.1. Samples: 106090458. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:48,976][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:49,378][626795] Updated weights for policy 0, policy_version 173882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:51,215][626795] Updated weights for policy 0, policy_version 173892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:52,896][626795] Updated weights for policy 0, policy_version 173902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:53,975][24592] Fps is (10 sec: 45055.8, 60 sec: 41779.1, 300 sec: 42851.9). Total num frames: 1424646144. Throughput: 0: 10831.6. Samples: 106159950. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:53,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:54,747][626795] Updated weights for policy 0, policy_version 173912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:56,421][626795] Updated weights for policy 0, policy_version 173922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:58,173][626795] Updated weights for policy 0, policy_version 173932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:58,975][24592] Fps is (10 sec: 46693.9, 60 sec: 41779.1, 300 sec: 42876.1). Total num frames: 1424883712. Throughput: 0: 10873.6. Samples: 106195548. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:05:58,977][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:05:59,814][626795] Updated weights for policy 0, policy_version 173942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:01,670][626795] Updated weights for policy 0, policy_version 173952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:03,318][626795] Updated weights for policy 0, policy_version 173962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:03,976][24592] Fps is (10 sec: 47513.7, 60 sec: 44172.0, 300 sec: 42903.9). Total num frames: 1425121280. Throughput: 0: 10892.5. Samples: 106267158. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:03,978][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:05,078][626795] Updated weights for policy 0, policy_version 173972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:06,804][626795] Updated weights for policy 0, policy_version 173982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:08,589][626795] Updated weights for policy 0, policy_version 173992 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:08,976][24592] Fps is (10 sec: 47512.2, 60 sec: 44373.1, 300 sec: 42931.6). Total num frames: 1425358848. Throughput: 0: 10897.9. Samples: 106337340. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:08,977][24592] Avg episode reward: [(0, '4.963')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:10,315][626795] Updated weights for policy 0, policy_version 174002 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:14,416][24592] Fps is (10 sec: 34523.9, 60 sec: 42287.9, 300 sec: 42507.2). Total num frames: 1425481728. Throughput: 0: 10000.5. Samples: 106337340. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:14,418][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:14,924][626795] Updated weights for policy 0, policy_version 174012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:16,790][626795] Updated weights for policy 0, policy_version 174022 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:18,658][626795] Updated weights for policy 0, policy_version 174032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:18,975][24592] Fps is (10 sec: 31950.0, 60 sec: 41915.7, 300 sec: 42431.8). Total num frames: 1425678336. Throughput: 0: 10082.7. Samples: 106407126. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:18,976][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:20,472][626795] Updated weights for policy 0, policy_version 174042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:22,373][626795] Updated weights for policy 0, policy_version 174052 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:23,975][24592] Fps is (10 sec: 44561.4, 60 sec: 41916.0, 300 sec: 42431.9). Total num frames: 1425907712. Throughput: 0: 10895.5. Samples: 106474650. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:23,977][24592] Avg episode reward: [(0, '4.830')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:24,118][626795] Updated weights for policy 0, policy_version 174062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:25,846][626795] Updated weights for policy 0, policy_version 174072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:27,460][626795] Updated weights for policy 0, policy_version 174082 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:28,975][24592] Fps is (10 sec: 46694.1, 60 sec: 41779.2, 300 sec: 42856.1). Total num frames: 1426145280. Throughput: 0: 10802.8. Samples: 106510260. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:28,977][24592] Avg episode reward: [(0, '4.928')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:29,196][626795] Updated weights for policy 0, policy_version 174092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:30,961][626795] Updated weights for policy 0, policy_version 174102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:32,633][626795] Updated weights for policy 0, policy_version 174112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:33,976][24592] Fps is (10 sec: 47512.6, 60 sec: 41915.6, 300 sec: 42903.9). Total num frames: 1426382848. Throughput: 0: 10938.6. Samples: 106582698. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:33,977][24592] Avg episode reward: [(0, '5.002')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:34,314][626795] Updated weights for policy 0, policy_version 174122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:36,114][626795] Updated weights for policy 0, policy_version 174132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:37,805][626795] Updated weights for policy 0, policy_version 174142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:38,976][24592] Fps is (10 sec: 48332.2, 60 sec: 44357.6, 300 sec: 42959.4). Total num frames: 1426628608. Throughput: 0: 10969.3. Samples: 106653570. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:38,978][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:39,483][626795] Updated weights for policy 0, policy_version 174152 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:41,211][626795] Updated weights for policy 0, policy_version 174162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:42,951][626795] Updated weights for policy 0, policy_version 174172 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:43,975][24592] Fps is (10 sec: 48334.1, 60 sec: 44509.9, 300 sec: 42959.4). Total num frames: 1426866176. Throughput: 0: 10966.6. Samples: 106689042. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:43,977][24592] Avg episode reward: [(0, '4.906')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:44,616][626795] Updated weights for policy 0, policy_version 174182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:46,410][626795] Updated weights for policy 0, policy_version 174192 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:49,572][24592] Fps is (10 sec: 34016.3, 60 sec: 42449.4, 300 sec: 42512.5). Total num frames: 1426989056. Throughput: 0: 10041.0. Samples: 106724994. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:49,573][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:51,223][626795] Updated weights for policy 0, policy_version 174202 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:53,034][626795] Updated weights for policy 0, policy_version 174212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:53,975][24592] Fps is (10 sec: 31948.4, 60 sec: 42325.3, 300 sec: 42459.6). Total num frames: 1427185664. Throughput: 0: 10144.3. Samples: 106793832. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:53,977][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:54,917][626795] Updated weights for policy 0, policy_version 174222 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:56,719][626795] Updated weights for policy 0, policy_version 174232 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:06:58,413][626795] Updated weights for policy 0, policy_version 174242 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:58,975][24592] Fps is (10 sec: 44429.3, 60 sec: 42052.3, 300 sec: 42431.8). Total num frames: 1427406848. Throughput: 0: 10995.8. Samples: 106827306. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:06:58,978][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:00,309][626795] Updated weights for policy 0, policy_version 174252 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:02,010][626795] Updated weights for policy 0, policy_version 174262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:03,676][626795] Updated weights for policy 0, policy_version 174272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:03,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42052.3, 300 sec: 42862.0). Total num frames: 1427644416. Throughput: 0: 10890.4. Samples: 106897194. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:03,977][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000174273_1427644416.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:04,075][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000173025_1417420800.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:05,456][626795] Updated weights for policy 0, policy_version 174282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:07,221][626795] Updated weights for policy 0, policy_version 174292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:08,835][626795] Updated weights for policy 0, policy_version 174302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:08,976][24592] Fps is (10 sec: 47512.0, 60 sec: 42052.3, 300 sec: 42903.9). Total num frames: 1427881984. Throughput: 0: 10966.6. Samples: 106968150. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:08,976][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:10,638][626795] Updated weights for policy 0, policy_version 174312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:12,282][626795] Updated weights for policy 0, policy_version 174322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:13,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44288.9, 300 sec: 42931.6). Total num frames: 1428119552. Throughput: 0: 10973.0. Samples: 107004042. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:13,977][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:14,067][626795] Updated weights for policy 0, policy_version 174332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:15,724][626795] Updated weights for policy 0, policy_version 174342 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:17,572][626795] Updated weights for policy 0, policy_version 174352 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:18,975][24592] Fps is (10 sec: 46696.3, 60 sec: 44509.9, 300 sec: 42903.9). Total num frames: 1428348928. Throughput: 0: 10935.8. Samples: 107074806. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:18,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:19,216][626795] Updated weights for policy 0, policy_version 174362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:21,054][626795] Updated weights for policy 0, policy_version 174372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:24,569][24592] Fps is (10 sec: 34796.9, 60 sec: 42586.4, 300 sec: 42512.8). Total num frames: 1428488192. Throughput: 0: 10025.6. Samples: 107110674. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:24,571][24592] Avg episode reward: [(0, '4.839')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:25,590][626795] Updated weights for policy 0, policy_version 174382 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:27,393][626795] Updated weights for policy 0, policy_version 174392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:28,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42325.4, 300 sec: 42489.9). Total num frames: 1428684800. Throughput: 0: 10170.4. Samples: 107146710. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:28,978][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:29,329][626795] Updated weights for policy 0, policy_version 174402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:31,208][626795] Updated weights for policy 0, policy_version 174412 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:32,819][626795] Updated weights for policy 0, policy_version 174422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:33,975][24592] Fps is (10 sec: 44417.8, 60 sec: 42052.4, 300 sec: 42431.8). Total num frames: 1428905984. Throughput: 0: 11017.4. Samples: 107214204. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:33,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:34,682][626795] Updated weights for policy 0, policy_version 174432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:36,560][626795] Updated weights for policy 0, policy_version 174442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:38,188][626795] Updated weights for policy 0, policy_version 174452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:38,976][24592] Fps is (10 sec: 45874.4, 60 sec: 41915.8, 300 sec: 42868.1). Total num frames: 1429143552. Throughput: 0: 10868.7. Samples: 107282922. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:38,978][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:40,031][626795] Updated weights for policy 0, policy_version 174462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:41,847][626795] Updated weights for policy 0, policy_version 174472 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:43,501][626795] Updated weights for policy 0, policy_version 174482 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:43,975][24592] Fps is (10 sec: 46694.0, 60 sec: 41779.1, 300 sec: 42876.1). Total num frames: 1429372928. Throughput: 0: 10890.9. Samples: 107317398. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:43,976][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:45,146][626795] Updated weights for policy 0, policy_version 174492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:46,879][626795] Updated weights for policy 0, policy_version 174502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:48,651][626795] Updated weights for policy 0, policy_version 174512 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:48,975][24592] Fps is (10 sec: 47514.1, 60 sec: 44267.3, 300 sec: 42959.4). Total num frames: 1429618688. Throughput: 0: 10934.9. Samples: 107389266. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:48,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:50,321][626795] Updated weights for policy 0, policy_version 174522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:52,220][626795] Updated weights for policy 0, policy_version 174532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:53,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44236.8, 300 sec: 42903.9). Total num frames: 1429839872. Throughput: 0: 10909.7. Samples: 107459082. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:53,976][24592] Avg episode reward: [(0, '4.899')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:53,988][626795] Updated weights for policy 0, policy_version 174542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:55,680][626795] Updated weights for policy 0, policy_version 174552 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:59,296][24592] Fps is (10 sec: 34926.0, 60 sec: 42643.8, 300 sec: 42524.5). Total num frames: 1429979136. Throughput: 0: 10823.9. Samples: 107494584. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:07:59,296][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:07:59,989][626795] Updated weights for policy 0, policy_version 174562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:01,754][626795] Updated weights for policy 0, policy_version 174572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:03,530][626795] Updated weights for policy 0, policy_version 174582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:03,976][24592] Fps is (10 sec: 35224.9, 60 sec: 42461.7, 300 sec: 42487.3). Total num frames: 1430192128. Throughput: 0: 10221.0. Samples: 107534754. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:03,979][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:05,235][626795] Updated weights for policy 0, policy_version 174592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:07,077][626795] Updated weights for policy 0, policy_version 174602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:08,886][626795] Updated weights for policy 0, policy_version 174612 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:08,975][24592] Fps is (10 sec: 45700.8, 60 sec: 42325.6, 300 sec: 42487.3). Total num frames: 1430421504. Throughput: 0: 11105.7. Samples: 107603832. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:08,976][24592] Avg episode reward: [(0, '4.994')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:10,452][626795] Updated weights for policy 0, policy_version 174622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:12,142][626795] Updated weights for policy 0, policy_version 174632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:13,976][24592] Fps is (10 sec: 46691.3, 60 sec: 42324.7, 300 sec: 42948.0). Total num frames: 1430659072. Throughput: 0: 10959.6. Samples: 107639904. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:13,979][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:14,014][626795] Updated weights for policy 0, policy_version 174642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:15,697][626795] Updated weights for policy 0, policy_version 174652 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:17,382][626795] Updated weights for policy 0, policy_version 174662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:18,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42598.4, 300 sec: 42987.2). Total num frames: 1430904832. Throughput: 0: 11044.8. Samples: 107711220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:18,976][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:19,182][626795] Updated weights for policy 0, policy_version 174672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:20,862][626795] Updated weights for policy 0, policy_version 174682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:22,492][626795] Updated weights for policy 0, policy_version 174692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:23,976][24592] Fps is (10 sec: 47516.2, 60 sec: 44541.0, 300 sec: 43014.9). Total num frames: 1431134208. Throughput: 0: 11103.4. Samples: 107782578. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:23,977][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:24,348][626795] Updated weights for policy 0, policy_version 174702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:26,158][626795] Updated weights for policy 0, policy_version 174712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:27,794][626795] Updated weights for policy 0, policy_version 174722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:28,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44782.9, 300 sec: 42987.2). Total num frames: 1431371776. Throughput: 0: 11105.6. Samples: 107817150. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:28,976][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:29,656][626795] Updated weights for policy 0, policy_version 174732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:31,522][626795] Updated weights for policy 0, policy_version 174742 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:34,437][24592] Fps is (10 sec: 33671.1, 60 sec: 42679.4, 300 sec: 42504.1). Total num frames: 1431486464. Throughput: 0: 10170.6. Samples: 107851644. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:34,438][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:36,308][626795] Updated weights for policy 0, policy_version 174752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:38,186][626795] Updated weights for policy 0, policy_version 174762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:38,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42325.4, 300 sec: 42459.6). Total num frames: 1431683072. Throughput: 0: 10230.1. Samples: 107919438. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:38,977][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:39,970][626795] Updated weights for policy 0, policy_version 174772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:41,760][626795] Updated weights for policy 0, policy_version 174782 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:43,526][626795] Updated weights for policy 0, policy_version 174792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:43,975][24592] Fps is (10 sec: 45520.6, 60 sec: 42461.9, 300 sec: 42487.3). Total num frames: 1431920640. Throughput: 0: 10274.3. Samples: 107953638. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:43,977][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:45,110][626795] Updated weights for policy 0, policy_version 174802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:46,986][626795] Updated weights for policy 0, policy_version 174812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:48,732][626795] Updated weights for policy 0, policy_version 174822 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:48,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42188.8, 300 sec: 42907.3). Total num frames: 1432150016. Throughput: 0: 10869.0. Samples: 108023856. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:48,976][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:50,368][626795] Updated weights for policy 0, policy_version 174832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:52,147][626795] Updated weights for policy 0, policy_version 174842 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:53,873][626795] Updated weights for policy 0, policy_version 174852 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:53,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42598.4, 300 sec: 43015.0). Total num frames: 1432395776. Throughput: 0: 10926.5. Samples: 108095526. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:53,976][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:55,581][626795] Updated weights for policy 0, policy_version 174862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:57,297][626795] Updated weights for policy 0, policy_version 174872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:58,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44337.0, 300 sec: 43014.9). Total num frames: 1432625152. Throughput: 0: 10917.0. Samples: 108131160. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:08:58,977][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:08:59,080][626795] Updated weights for policy 0, policy_version 174882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:00,779][626795] Updated weights for policy 0, policy_version 174892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:02,513][626795] Updated weights for policy 0, policy_version 174902 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:03,976][24592] Fps is (10 sec: 46692.1, 60 sec: 44509.7, 300 sec: 43014.9). Total num frames: 1432862720. Throughput: 0: 10887.9. Samples: 108201180. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:03,978][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000174910_1432862720.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:04,056][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000173644_1422491648.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:04,329][626795] Updated weights for policy 0, policy_version 174912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:06,040][626795] Updated weights for policy 0, policy_version 174922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:09,409][24592] Fps is (10 sec: 34545.2, 60 sec: 42428.0, 300 sec: 42563.6). Total num frames: 1432985600. Throughput: 0: 9983.2. Samples: 108236154. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:09,411][24592] Avg episode reward: [(0, '4.921')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:10,636][626795] Updated weights for policy 0, policy_version 174932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:12,538][626795] Updated weights for policy 0, policy_version 174942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:13,975][24592] Fps is (10 sec: 32769.4, 60 sec: 42189.4, 300 sec: 42487.3). Total num frames: 1433190400. Throughput: 0: 10118.4. Samples: 108272478. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:13,976][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:14,285][626795] Updated weights for policy 0, policy_version 174952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:16,176][626795] Updated weights for policy 0, policy_version 174962 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:17,913][626795] Updated weights for policy 0, policy_version 174972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:18,976][24592] Fps is (10 sec: 45382.3, 60 sec: 41914.9, 300 sec: 42514.9). Total num frames: 1433419776. Throughput: 0: 10983.7. Samples: 108340848. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:18,978][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:19,721][626795] Updated weights for policy 0, policy_version 174982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:21,404][626795] Updated weights for policy 0, policy_version 174992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:23,161][626795] Updated weights for policy 0, policy_version 175002 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:23,975][24592] Fps is (10 sec: 45875.4, 60 sec: 41916.0, 300 sec: 42913.1). Total num frames: 1433649152. Throughput: 0: 10906.7. Samples: 108410238. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:23,977][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:24,946][626795] Updated weights for policy 0, policy_version 175012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:26,663][626795] Updated weights for policy 0, policy_version 175022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:28,360][626795] Updated weights for policy 0, policy_version 175032 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:28,975][24592] Fps is (10 sec: 46699.6, 60 sec: 41915.7, 300 sec: 42987.2). Total num frames: 1433886720. Throughput: 0: 10932.7. Samples: 108445608. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:28,976][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:30,089][626795] Updated weights for policy 0, policy_version 175042 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:31,804][626795] Updated weights for policy 0, policy_version 175052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:33,595][626795] Updated weights for policy 0, policy_version 175062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:33,976][24592] Fps is (10 sec: 46691.8, 60 sec: 44166.9, 300 sec: 42987.1). Total num frames: 1434116096. Throughput: 0: 10941.1. Samples: 108516210. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:33,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:35,451][626795] Updated weights for policy 0, policy_version 175072 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:37,047][626795] Updated weights for policy 0, policy_version 175082 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:38,812][626795] Updated weights for policy 0, policy_version 175092 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:38,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44509.9, 300 sec: 42987.2). Total num frames: 1434353664. Throughput: 0: 10919.9. Samples: 108586920. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:38,976][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:40,642][626795] Updated weights for policy 0, policy_version 175102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:44,415][24592] Fps is (10 sec: 34529.1, 60 sec: 42288.6, 300 sec: 42535.0). Total num frames: 1434476544. Throughput: 0: 10779.5. Samples: 108620976. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:44,416][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:45,228][626795] Updated weights for policy 0, policy_version 175112 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:47,039][626795] Updated weights for policy 0, policy_version 175122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:48,898][626795] Updated weights for policy 0, policy_version 175132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:48,975][24592] Fps is (10 sec: 32768.0, 60 sec: 42188.8, 300 sec: 42515.1). Total num frames: 1434681344. Throughput: 0: 10132.4. Samples: 108657132. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:48,976][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:50,671][626795] Updated weights for policy 0, policy_version 175142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:52,427][626795] Updated weights for policy 0, policy_version 175152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:53,976][24592] Fps is (10 sec: 46269.0, 60 sec: 42052.0, 300 sec: 42515.0). Total num frames: 1434918912. Throughput: 0: 11003.4. Samples: 108726534. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:53,977][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:54,129][626795] Updated weights for policy 0, policy_version 175162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:55,864][626795] Updated weights for policy 0, policy_version 175172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:57,654][626795] Updated weights for policy 0, policy_version 175182 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:58,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42052.2, 300 sec: 42972.8). Total num frames: 1435148288. Throughput: 0: 10883.7. Samples: 108762246. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:09:58,976][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:09:59,402][626795] Updated weights for policy 0, policy_version 175192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:01,060][626795] Updated weights for policy 0, policy_version 175202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:02,798][626795] Updated weights for policy 0, policy_version 175212 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:03,975][24592] Fps is (10 sec: 46696.0, 60 sec: 42052.6, 300 sec: 43014.9). Total num frames: 1435385856. Throughput: 0: 10939.1. Samples: 108833094. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:03,976][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:04,542][626795] Updated weights for policy 0, policy_version 175222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:06,302][626795] Updated weights for policy 0, policy_version 175232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:07,905][626795] Updated weights for policy 0, policy_version 175242 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:08,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44284.1, 300 sec: 43042.7). Total num frames: 1435623424. Throughput: 0: 10973.5. Samples: 108904044. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:08,977][24592] Avg episode reward: [(0, '4.936')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:09,728][626795] Updated weights for policy 0, policy_version 175252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:11,603][626795] Updated weights for policy 0, policy_version 175262 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:13,423][626795] Updated weights for policy 0, policy_version 175272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:13,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44373.4, 300 sec: 43014.9). Total num frames: 1435852800. Throughput: 0: 10939.6. Samples: 108937890. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:13,976][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:15,166][626795] Updated weights for policy 0, policy_version 175282 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:19,438][24592] Fps is (10 sec: 33669.3, 60 sec: 42273.5, 300 sec: 42587.3). Total num frames: 1435975680. Throughput: 0: 10017.8. Samples: 108971634. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:19,438][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:19,884][626795] Updated weights for policy 0, policy_version 175292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:21,763][626795] Updated weights for policy 0, policy_version 175302 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:23,455][626795] Updated weights for policy 0, policy_version 175312 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:23,981][24592] Fps is (10 sec: 31930.6, 60 sec: 42048.3, 300 sec: 42486.5). Total num frames: 1436172288. Throughput: 0: 10100.3. Samples: 109041492. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:23,982][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:25,345][626795] Updated weights for policy 0, policy_version 175322 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:26,952][626795] Updated weights for policy 0, policy_version 175332 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:28,756][626795] Updated weights for policy 0, policy_version 175342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:28,975][24592] Fps is (10 sec: 45521.9, 60 sec: 42052.3, 300 sec: 42515.1). Total num frames: 1436409856. Throughput: 0: 10206.0. Samples: 109075758. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:28,979][24592] Avg episode reward: [(0, '4.824')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:30,474][626795] Updated weights for policy 0, policy_version 175352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:32,205][626795] Updated weights for policy 0, policy_version 175362 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:33,950][626795] Updated weights for policy 0, policy_version 175372 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:33,976][24592] Fps is (10 sec: 47539.0, 60 sec: 42188.9, 300 sec: 42982.3). Total num frames: 1436647424. Throughput: 0: 10885.1. Samples: 109146966. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:33,977][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:35,666][626795] Updated weights for policy 0, policy_version 175382 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:37,448][626795] Updated weights for policy 0, policy_version 175392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:38,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42188.8, 300 sec: 43014.9). Total num frames: 1436884992. Throughput: 0: 10929.4. Samples: 109218354. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:38,977][24592] Avg episode reward: [(0, '4.929')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:39,109][626795] Updated weights for policy 0, policy_version 175402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:40,742][626795] Updated weights for policy 0, policy_version 175412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:42,571][626795] Updated weights for policy 0, policy_version 175422 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:43,975][24592] Fps is (10 sec: 47514.9, 60 sec: 44425.6, 300 sec: 43070.5). Total num frames: 1437122560. Throughput: 0: 10928.0. Samples: 109254006. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:43,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:44,444][626795] Updated weights for policy 0, policy_version 175432 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:46,143][626795] Updated weights for policy 0, policy_version 175442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:47,976][626795] Updated weights for policy 0, policy_version 175452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:48,975][24592] Fps is (10 sec: 45056.3, 60 sec: 44236.8, 300 sec: 43015.0). Total num frames: 1437335552. Throughput: 0: 10875.2. Samples: 109322478. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:48,977][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:49,954][626795] Updated weights for policy 0, policy_version 175462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:54,438][24592] Fps is (10 sec: 32101.4, 60 sec: 42001.5, 300 sec: 42559.4). Total num frames: 1437458432. Throughput: 0: 9909.4. Samples: 109354554. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:54,440][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:54,571][626795] Updated weights for policy 0, policy_version 175472 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:56,469][626795] Updated weights for policy 0, policy_version 175482 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:10:58,320][626795] Updated weights for policy 0, policy_version 175492 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:58,976][24592] Fps is (10 sec: 31947.7, 60 sec: 41779.0, 300 sec: 42487.3). Total num frames: 1437655040. Throughput: 0: 10052.7. Samples: 109390266. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:10:58,977][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:00,208][626795] Updated weights for policy 0, policy_version 175502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:01,923][626795] Updated weights for policy 0, policy_version 175512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:03,710][626795] Updated weights for policy 0, policy_version 175522 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:03,975][24592] Fps is (10 sec: 44666.1, 60 sec: 41642.6, 300 sec: 42459.6). Total num frames: 1437884416. Throughput: 0: 10910.3. Samples: 109457556. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:03,976][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:03,978][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000175523_1437884416.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:04,034][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000174273_1427644416.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:05,430][626795] Updated weights for policy 0, policy_version 175532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:07,164][626795] Updated weights for policy 0, policy_version 175542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:08,924][626795] Updated weights for policy 0, policy_version 175552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:08,975][24592] Fps is (10 sec: 46695.9, 60 sec: 41642.7, 300 sec: 42912.4). Total num frames: 1438121984. Throughput: 0: 10819.1. Samples: 109528290. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:08,976][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:10,721][626795] Updated weights for policy 0, policy_version 175562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:12,268][626795] Updated weights for policy 0, policy_version 175572 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:13,975][24592] Fps is (10 sec: 47513.6, 60 sec: 41779.2, 300 sec: 42987.2). Total num frames: 1438359552. Throughput: 0: 10845.6. Samples: 109563810. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:13,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:14,104][626795] Updated weights for policy 0, policy_version 175582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:15,774][626795] Updated weights for policy 0, policy_version 175592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:17,573][626795] Updated weights for policy 0, policy_version 175602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:18,976][24592] Fps is (10 sec: 47512.6, 60 sec: 44029.7, 300 sec: 43014.9). Total num frames: 1438597120. Throughput: 0: 10841.4. Samples: 109634826. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:18,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:19,385][626795] Updated weights for policy 0, policy_version 175612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:21,158][626795] Updated weights for policy 0, policy_version 175622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:22,889][626795] Updated weights for policy 0, policy_version 175632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:23,976][24592] Fps is (10 sec: 45873.2, 60 sec: 44104.1, 300 sec: 42959.3). Total num frames: 1438818304. Throughput: 0: 10775.8. Samples: 109703268. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:23,977][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:24,669][626795] Updated weights for policy 0, policy_version 175642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:25,959][626772] Signal inference workers to stop experience collection... (1550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:25,961][626772] Signal inference workers to resume experience collection... (1550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:25,982][626795] InferenceWorker_p0-w0: stopping experience collection (1550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:25,985][626795] InferenceWorker_p0-w0: resuming experience collection (1550 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:26,598][626795] Updated weights for policy 0, policy_version 175652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:29,442][24592] Fps is (10 sec: 32873.3, 60 sec: 41863.2, 300 sec: 42503.4). Total num frames: 1438941184. Throughput: 0: 10617.5. Samples: 109736748. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:29,443][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:31,154][626795] Updated weights for policy 0, policy_version 175662 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:33,102][626795] Updated weights for policy 0, policy_version 175672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:33,975][24592] Fps is (10 sec: 31950.2, 60 sec: 41506.4, 300 sec: 42404.0). Total num frames: 1439137792. Throughput: 0: 10000.9. Samples: 109772520. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:33,977][24592] Avg episode reward: [(0, '4.971')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:34,841][626795] Updated weights for policy 0, policy_version 175682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:36,571][626795] Updated weights for policy 0, policy_version 175692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:38,341][626795] Updated weights for policy 0, policy_version 175702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:38,975][24592] Fps is (10 sec: 45542.6, 60 sec: 41506.2, 300 sec: 42404.0). Total num frames: 1439375360. Throughput: 0: 10941.8. Samples: 109841868. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:38,976][24592] Avg episode reward: [(0, '4.930')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:40,142][626795] Updated weights for policy 0, policy_version 175712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:41,770][626795] Updated weights for policy 0, policy_version 175722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:43,509][626795] Updated weights for policy 0, policy_version 175732 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:43,975][24592] Fps is (10 sec: 47513.5, 60 sec: 41506.2, 300 sec: 42879.5). Total num frames: 1439612928. Throughput: 0: 10825.8. Samples: 109877424. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:43,977][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:45,262][626795] Updated weights for policy 0, policy_version 175742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:47,042][626795] Updated weights for policy 0, policy_version 175752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:48,723][626795] Updated weights for policy 0, policy_version 175762 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:48,976][24592] Fps is (10 sec: 47512.7, 60 sec: 41915.6, 300 sec: 42931.6). Total num frames: 1439850496. Throughput: 0: 10909.8. Samples: 109948500. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:48,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:50,476][626795] Updated weights for policy 0, policy_version 175772 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:52,296][626795] Updated weights for policy 0, policy_version 175782 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:53,948][626795] Updated weights for policy 0, policy_version 175792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:53,976][24592] Fps is (10 sec: 47510.9, 60 sec: 44167.5, 300 sec: 42987.1). Total num frames: 1440088064. Throughput: 0: 10897.7. Samples: 110018694. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:53,978][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:55,687][626795] Updated weights for policy 0, policy_version 175802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:57,502][626795] Updated weights for policy 0, policy_version 175812 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:58,975][24592] Fps is (10 sec: 46695.2, 60 sec: 44373.6, 300 sec: 42959.4). Total num frames: 1440317440. Throughput: 0: 10888.8. Samples: 110053806. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:11:58,977][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:11:59,398][626795] Updated weights for policy 0, policy_version 175822 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:01,080][626795] Updated weights for policy 0, policy_version 175832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:04,446][24592] Fps is (10 sec: 32861.6, 60 sec: 42131.4, 300 sec: 42475.1). Total num frames: 1440432128. Throughput: 0: 9960.7. Samples: 110087742. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:04,447][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:05,836][626795] Updated weights for policy 0, policy_version 175842 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:07,609][626795] Updated weights for policy 0, policy_version 175852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:08,975][24592] Fps is (10 sec: 31948.7, 60 sec: 41915.7, 300 sec: 42431.8). Total num frames: 1440636928. Throughput: 0: 10080.8. Samples: 110156898. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:08,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:09,440][626795] Updated weights for policy 0, policy_version 175862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:11,201][626795] Updated weights for policy 0, policy_version 175872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:12,960][626795] Updated weights for policy 0, policy_version 175882 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:13,975][24592] Fps is (10 sec: 45562.2, 60 sec: 41779.2, 300 sec: 42431.8). Total num frames: 1440866304. Throughput: 0: 10216.6. Samples: 110191728. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:13,977][24592] Avg episode reward: [(0, '4.373')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:14,674][626795] Updated weights for policy 0, policy_version 175892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:16,413][626795] Updated weights for policy 0, policy_version 175902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:18,067][626795] Updated weights for policy 0, policy_version 175912 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:18,975][24592] Fps is (10 sec: 46694.5, 60 sec: 41779.3, 300 sec: 42851.3). Total num frames: 1441103872. Throughput: 0: 10894.0. Samples: 110262750. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:18,976][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:19,869][626795] Updated weights for policy 0, policy_version 175922 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:21,620][626795] Updated weights for policy 0, policy_version 175932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:23,233][626795] Updated weights for policy 0, policy_version 175942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:23,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42189.1, 300 sec: 42931.6). Total num frames: 1441349632. Throughput: 0: 10947.3. Samples: 110334498. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:23,976][24592] Avg episode reward: [(0, '4.493')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:24,942][626795] Updated weights for policy 0, policy_version 175952 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:26,706][626795] Updated weights for policy 0, policy_version 175962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:28,541][626795] Updated weights for policy 0, policy_version 175972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:28,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44308.3, 300 sec: 42959.4). Total num frames: 1441579008. Throughput: 0: 10940.5. Samples: 110369748. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:28,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:30,309][626795] Updated weights for policy 0, policy_version 175982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:32,018][626795] Updated weights for policy 0, policy_version 175992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:33,900][626795] Updated weights for policy 0, policy_version 176002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:33,976][24592] Fps is (10 sec: 45873.1, 60 sec: 44509.5, 300 sec: 42931.6). Total num frames: 1441808384. Throughput: 0: 10897.4. Samples: 110438886. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:33,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:35,735][626795] Updated weights for policy 0, policy_version 176012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:39,454][24592] Fps is (10 sec: 32834.0, 60 sec: 42125.6, 300 sec: 42473.9). Total num frames: 1441923072. Throughput: 0: 9977.2. Samples: 110472438. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:39,455][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:40,453][626795] Updated weights for policy 0, policy_version 176022 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:42,298][626795] Updated weights for policy 0, policy_version 176032 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:43,975][24592] Fps is (10 sec: 31949.8, 60 sec: 41915.7, 300 sec: 42404.0). Total num frames: 1442127872. Throughput: 0: 10090.6. Samples: 110507886. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:43,978][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:44,167][626795] Updated weights for policy 0, policy_version 176042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:45,850][626795] Updated weights for policy 0, policy_version 176052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:47,578][626795] Updated weights for policy 0, policy_version 176062 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:48,975][24592] Fps is (10 sec: 45601.3, 60 sec: 41779.3, 300 sec: 42431.8). Total num frames: 1442357248. Throughput: 0: 10978.4. Samples: 110576604. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:48,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:49,330][626795] Updated weights for policy 0, policy_version 176072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:51,086][626795] Updated weights for policy 0, policy_version 176082 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:52,841][626795] Updated weights for policy 0, policy_version 176092 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:53,975][24592] Fps is (10 sec: 46695.1, 60 sec: 41779.6, 300 sec: 42811.5). Total num frames: 1442594816. Throughput: 0: 10894.9. Samples: 110647170. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:53,976][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:54,505][626795] Updated weights for policy 0, policy_version 176102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:56,271][626795] Updated weights for policy 0, policy_version 176112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:57,961][626795] Updated weights for policy 0, policy_version 176122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:58,975][24592] Fps is (10 sec: 47513.6, 60 sec: 41915.7, 300 sec: 42848.4). Total num frames: 1442832384. Throughput: 0: 10913.6. Samples: 110682840. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:12:58,977][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:12:59,768][626795] Updated weights for policy 0, policy_version 176132 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:01,400][626795] Updated weights for policy 0, policy_version 176142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:03,175][626795] Updated weights for policy 0, policy_version 176152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:03,975][24592] Fps is (10 sec: 47513.4, 60 sec: 44311.3, 300 sec: 42876.1). Total num frames: 1443069952. Throughput: 0: 10905.9. Samples: 110753514. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:03,976][24592] Avg episode reward: [(0, '4.837')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000176156_1443069952.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:04,031][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000174910_1432862720.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:05,030][626795] Updated weights for policy 0, policy_version 176162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:06,794][626795] Updated weights for policy 0, policy_version 176172 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:08,550][626795] Updated weights for policy 0, policy_version 176182 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:08,976][24592] Fps is (10 sec: 46692.8, 60 sec: 44373.1, 300 sec: 42848.4). Total num frames: 1443299328. Throughput: 0: 10841.4. Samples: 110822364. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:08,978][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:10,354][626795] Updated weights for policy 0, policy_version 176192 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:14,426][24592] Fps is (10 sec: 33707.2, 60 sec: 42280.9, 300 sec: 42367.1). Total num frames: 1443422208. Throughput: 0: 10687.7. Samples: 110855508. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:14,427][24592] Avg episode reward: [(0, '4.276')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:15,070][626795] Updated weights for policy 0, policy_version 176202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:17,029][626795] Updated weights for policy 0, policy_version 176212 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:18,687][626795] Updated weights for policy 0, policy_version 176222 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:18,975][24592] Fps is (10 sec: 31949.9, 60 sec: 41915.7, 300 sec: 42320.8). Total num frames: 1443618816. Throughput: 0: 10033.8. Samples: 110890404. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:18,976][24592] Avg episode reward: [(0, '4.379')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:20,509][626795] Updated weights for policy 0, policy_version 176232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:22,243][626795] Updated weights for policy 0, policy_version 176242 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:23,932][626795] Updated weights for policy 0, policy_version 176252 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:23,975][24592] Fps is (10 sec: 45466.0, 60 sec: 41779.2, 300 sec: 42320.7). Total num frames: 1443856384. Throughput: 0: 10975.5. Samples: 110961078. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:23,976][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:25,618][626795] Updated weights for policy 0, policy_version 176262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:27,436][626795] Updated weights for policy 0, policy_version 176272 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:28,975][24592] Fps is (10 sec: 46694.2, 60 sec: 41779.2, 300 sec: 42776.5). Total num frames: 1444085760. Throughput: 0: 10854.2. Samples: 110996322. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:28,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:29,153][626795] Updated weights for policy 0, policy_version 176282 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:30,954][626795] Updated weights for policy 0, policy_version 176292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:32,581][626795] Updated weights for policy 0, policy_version 176302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:33,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42052.6, 300 sec: 42876.1). Total num frames: 1444331520. Throughput: 0: 10913.7. Samples: 111067722. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:33,978][24592] Avg episode reward: [(0, '4.999')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:34,352][626795] Updated weights for policy 0, policy_version 176312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:36,007][626795] Updated weights for policy 0, policy_version 176322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:37,775][626795] Updated weights for policy 0, policy_version 176332 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:38,976][24592] Fps is (10 sec: 48328.3, 60 sec: 44454.3, 300 sec: 42876.0). Total num frames: 1444569088. Throughput: 0: 10928.0. Samples: 111138942. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:38,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:39,543][626795] Updated weights for policy 0, policy_version 176342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:41,160][626795] Updated weights for policy 0, policy_version 176352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:42,999][626795] Updated weights for policy 0, policy_version 176362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:43,976][24592] Fps is (10 sec: 46691.6, 60 sec: 44509.5, 300 sec: 42876.0). Total num frames: 1444798464. Throughput: 0: 10909.5. Samples: 111173772. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:43,978][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:44,865][626795] Updated weights for policy 0, policy_version 176372 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:49,484][24592] Fps is (10 sec: 32743.6, 60 sec: 42240.2, 300 sec: 42358.7). Total num frames: 1444913152. Throughput: 0: 10008.2. Samples: 111208974. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:49,485][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:49,524][626795] Updated weights for policy 0, policy_version 176382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:51,430][626795] Updated weights for policy 0, policy_version 176392 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:53,376][626795] Updated weights for policy 0, policy_version 176402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:53,976][24592] Fps is (10 sec: 30311.2, 60 sec: 41778.9, 300 sec: 42292.9). Total num frames: 1445101568. Throughput: 0: 10054.1. Samples: 111274800. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:53,980][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:55,263][626795] Updated weights for policy 0, policy_version 176412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:56,915][626795] Updated weights for policy 0, policy_version 176422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:13:58,623][626795] Updated weights for policy 0, policy_version 176432 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:58,975][24592] Fps is (10 sec: 44882.0, 60 sec: 41779.2, 300 sec: 42293.0). Total num frames: 1445339136. Throughput: 0: 10196.1. Samples: 111309738. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:13:58,976][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:00,380][626795] Updated weights for policy 0, policy_version 176442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:02,123][626795] Updated weights for policy 0, policy_version 176452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:03,847][626795] Updated weights for policy 0, policy_version 176462 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:03,975][24592] Fps is (10 sec: 48334.5, 60 sec: 41915.8, 300 sec: 42772.4). Total num frames: 1445584896. Throughput: 0: 10900.3. Samples: 111380916. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:03,976][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:05,607][626795] Updated weights for policy 0, policy_version 176472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:07,221][626795] Updated weights for policy 0, policy_version 176482 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:08,976][24592] Fps is (10 sec: 47512.8, 60 sec: 41915.9, 300 sec: 42792.8). Total num frames: 1445814272. Throughput: 0: 10906.5. Samples: 111451872. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:08,978][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:09,031][626795] Updated weights for policy 0, policy_version 176492 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:10,718][626795] Updated weights for policy 0, policy_version 176502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:12,426][626795] Updated weights for policy 0, policy_version 176512 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:13,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44158.7, 300 sec: 42820.7). Total num frames: 1446051840. Throughput: 0: 10922.9. Samples: 111487854. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:13,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:14,224][626795] Updated weights for policy 0, policy_version 176522 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:15,958][626795] Updated weights for policy 0, policy_version 176532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:17,605][626795] Updated weights for policy 0, policy_version 176542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:18,975][24592] Fps is (10 sec: 48332.9, 60 sec: 44646.3, 300 sec: 42876.1). Total num frames: 1446297600. Throughput: 0: 10905.6. Samples: 111558474. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:18,976][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:19,464][626795] Updated weights for policy 0, policy_version 176552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:21,374][626795] Updated weights for policy 0, policy_version 176562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:24,440][24592] Fps is (10 sec: 33663.2, 60 sec: 42135.9, 300 sec: 42365.1). Total num frames: 1446404096. Throughput: 0: 9963.8. Samples: 111591930. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:24,442][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:26,038][626795] Updated weights for policy 0, policy_version 176572 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:27,988][626795] Updated weights for policy 0, policy_version 176582 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:28,976][24592] Fps is (10 sec: 29490.5, 60 sec: 41779.0, 300 sec: 42293.0). Total num frames: 1446592512. Throughput: 0: 10050.1. Samples: 111626022. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:28,979][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:29,850][626795] Updated weights for policy 0, policy_version 176592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:31,583][626795] Updated weights for policy 0, policy_version 176602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:33,269][626795] Updated weights for policy 0, policy_version 176612 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:33,975][24592] Fps is (10 sec: 45531.0, 60 sec: 41779.2, 300 sec: 42320.7). Total num frames: 1446838272. Throughput: 0: 10906.8. Samples: 111694230. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:33,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:35,028][626795] Updated weights for policy 0, policy_version 176622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:36,799][626795] Updated weights for policy 0, policy_version 176632 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:38,437][626795] Updated weights for policy 0, policy_version 176642 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:38,975][24592] Fps is (10 sec: 48334.6, 60 sec: 41779.9, 300 sec: 42773.2). Total num frames: 1447075840. Throughput: 0: 10896.1. Samples: 111765120. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:38,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:40,224][626795] Updated weights for policy 0, policy_version 176652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:42,024][626795] Updated weights for policy 0, policy_version 176662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:43,598][626795] Updated weights for policy 0, policy_version 176672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:43,975][24592] Fps is (10 sec: 46694.4, 60 sec: 41779.6, 300 sec: 42792.8). Total num frames: 1447305216. Throughput: 0: 10912.7. Samples: 111800808. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:43,976][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:45,375][626795] Updated weights for policy 0, policy_version 176682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:47,250][626795] Updated weights for policy 0, policy_version 176692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:48,937][626795] Updated weights for policy 0, policy_version 176702 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:48,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44202.0, 300 sec: 42792.8). Total num frames: 1447542784. Throughput: 0: 10901.5. Samples: 111871482. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:48,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:50,619][626795] Updated weights for policy 0, policy_version 176712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:52,430][626795] Updated weights for policy 0, policy_version 176722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:53,976][24592] Fps is (10 sec: 47511.5, 60 sec: 44646.3, 300 sec: 42820.5). Total num frames: 1447780352. Throughput: 0: 10895.7. Samples: 111942180. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:53,978][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:54,154][626795] Updated weights for policy 0, policy_version 176732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:14:56,115][626795] Updated weights for policy 0, policy_version 176742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:59,384][24592] Fps is (10 sec: 33844.2, 60 sec: 42310.6, 300 sec: 42345.4). Total num frames: 1447895040. Throughput: 0: 10708.1. Samples: 111974088. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:14:59,384][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:00,697][626795] Updated weights for policy 0, policy_version 176752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:02,696][626795] Updated weights for policy 0, policy_version 176762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:03,975][24592] Fps is (10 sec: 30311.4, 60 sec: 41642.6, 300 sec: 42237.4). Total num frames: 1448083456. Throughput: 0: 10008.7. Samples: 112008864. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:03,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:04,022][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000176769_1448091648.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:04,080][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000175523_1437884416.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:04,632][626795] Updated weights for policy 0, policy_version 176772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:06,447][626795] Updated weights for policy 0, policy_version 176782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:08,131][626795] Updated weights for policy 0, policy_version 176792 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:08,975][24592] Fps is (10 sec: 43556.8, 60 sec: 41642.7, 300 sec: 42237.4). Total num frames: 1448312832. Throughput: 0: 10865.4. Samples: 112075830. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:08,977][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:09,920][626795] Updated weights for policy 0, policy_version 176802 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:11,776][626795] Updated weights for policy 0, policy_version 176812 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:13,526][626795] Updated weights for policy 0, policy_version 176822 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:13,975][24592] Fps is (10 sec: 45875.7, 60 sec: 41506.1, 300 sec: 42665.3). Total num frames: 1448542208. Throughput: 0: 10767.7. Samples: 112110564. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:13,976][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:15,302][626795] Updated weights for policy 0, policy_version 176832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:17,045][626795] Updated weights for policy 0, policy_version 176842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:18,877][626795] Updated weights for policy 0, policy_version 176852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:18,975][24592] Fps is (10 sec: 45875.4, 60 sec: 41233.1, 300 sec: 42710.3). Total num frames: 1448771584. Throughput: 0: 10777.9. Samples: 112179234. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:18,976][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:20,653][626795] Updated weights for policy 0, policy_version 176862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:22,401][626795] Updated weights for policy 0, policy_version 176872 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:23,976][24592] Fps is (10 sec: 45873.5, 60 sec: 43618.2, 300 sec: 42681.7). Total num frames: 1449000960. Throughput: 0: 10743.1. Samples: 112248564. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:23,981][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:24,381][626795] Updated weights for policy 0, policy_version 176882 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:26,041][626795] Updated weights for policy 0, policy_version 176892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:27,809][626795] Updated weights for policy 0, policy_version 176902 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:28,975][24592] Fps is (10 sec: 45875.2, 60 sec: 43964.0, 300 sec: 42654.0). Total num frames: 1449230336. Throughput: 0: 10696.0. Samples: 112282128. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:28,977][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:29,636][626795] Updated weights for policy 0, policy_version 176912 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:31,396][626795] Updated weights for policy 0, policy_version 176922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:33,976][24592] Fps is (10 sec: 36864.3, 60 sec: 42188.6, 300 sec: 42320.7). Total num frames: 1449369600. Throughput: 0: 10410.6. Samples: 112339962. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:33,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:35,163][626795] Updated weights for policy 0, policy_version 176932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:36,957][626795] Updated weights for policy 0, policy_version 176942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:38,712][626795] Updated weights for policy 0, policy_version 176952 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:38,976][24592] Fps is (10 sec: 36861.5, 60 sec: 42051.8, 300 sec: 42292.8). Total num frames: 1449598976. Throughput: 0: 10115.9. Samples: 112397400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:38,978][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:40,429][626795] Updated weights for policy 0, policy_version 176962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:42,292][626795] Updated weights for policy 0, policy_version 176972 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:43,975][24592] Fps is (10 sec: 45876.5, 60 sec: 42052.2, 300 sec: 42348.5). Total num frames: 1449828352. Throughput: 0: 10279.9. Samples: 112432488. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:43,978][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:43,984][626795] Updated weights for policy 0, policy_version 176982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:45,654][626795] Updated weights for policy 0, policy_version 176992 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:47,353][626795] Updated weights for policy 0, policy_version 177002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:48,975][24592] Fps is (10 sec: 47516.9, 60 sec: 42188.8, 300 sec: 42832.2). Total num frames: 1450074112. Throughput: 0: 10996.2. Samples: 112503690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:48,977][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:49,241][626795] Updated weights for policy 0, policy_version 177012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:49,336][626772] Signal inference workers to stop experience collection... (1600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:49,337][626772] Signal inference workers to resume experience collection... (1600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:49,343][626795] InferenceWorker_p0-w0: stopping experience collection (1600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:49,348][626795] InferenceWorker_p0-w0: resuming experience collection (1600 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:50,871][626795] Updated weights for policy 0, policy_version 177022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:52,606][626795] Updated weights for policy 0, policy_version 177032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:53,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42052.5, 300 sec: 42876.1). Total num frames: 1450303488. Throughput: 0: 11062.7. Samples: 112573650. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:53,976][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:54,454][626795] Updated weights for policy 0, policy_version 177042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:56,112][626795] Updated weights for policy 0, policy_version 177052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:57,868][626795] Updated weights for policy 0, policy_version 177062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:58,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44264.9, 300 sec: 42876.1). Total num frames: 1450532864. Throughput: 0: 11080.4. Samples: 112609182. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:15:58,976][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:15:59,685][626795] Updated weights for policy 0, policy_version 177072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:01,302][626795] Updated weights for policy 0, policy_version 177082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:03,055][626795] Updated weights for policy 0, policy_version 177092 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:03,987][24592] Fps is (10 sec: 46640.6, 60 sec: 44774.4, 300 sec: 42874.4). Total num frames: 1450770432. Throughput: 0: 11139.9. Samples: 112680660. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:03,989][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:04,893][626795] Updated weights for policy 0, policy_version 177102 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:09,026][24592] Fps is (10 sec: 32601.1, 60 sec: 42425.7, 300 sec: 42368.9). Total num frames: 1450860544. Throughput: 0: 10321.7. Samples: 112713564. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:09,027][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:09,940][626795] Updated weights for policy 0, policy_version 177112 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:11,661][626795] Updated weights for policy 0, policy_version 177122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:13,533][626795] Updated weights for policy 0, policy_version 177132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:13,975][24592] Fps is (10 sec: 31165.3, 60 sec: 42325.2, 300 sec: 42320.7). Total num frames: 1451081728. Throughput: 0: 10293.7. Samples: 112745346. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:13,977][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:15,400][626795] Updated weights for policy 0, policy_version 177142 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:17,216][626795] Updated weights for policy 0, policy_version 177152 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:18,927][626795] Updated weights for policy 0, policy_version 177162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:18,976][24592] Fps is (10 sec: 45286.4, 60 sec: 42325.1, 300 sec: 42348.5). Total num frames: 1451311104. Throughput: 0: 10522.1. Samples: 112813458. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:18,976][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:20,747][626795] Updated weights for policy 0, policy_version 177172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:22,525][626795] Updated weights for policy 0, policy_version 177182 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:23,975][24592] Fps is (10 sec: 45875.1, 60 sec: 42325.5, 300 sec: 42777.1). Total num frames: 1451540480. Throughput: 0: 10773.5. Samples: 112882200. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:23,977][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:24,331][626795] Updated weights for policy 0, policy_version 177192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:26,136][626795] Updated weights for policy 0, policy_version 177202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:27,859][626795] Updated weights for policy 0, policy_version 177212 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:28,975][24592] Fps is (10 sec: 45876.7, 60 sec: 42325.4, 300 sec: 42820.6). Total num frames: 1451769856. Throughput: 0: 10761.9. Samples: 112916772. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:28,977][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:29,562][626795] Updated weights for policy 0, policy_version 177222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:31,410][626795] Updated weights for policy 0, policy_version 177232 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:33,125][626795] Updated weights for policy 0, policy_version 177242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:33,975][24592] Fps is (10 sec: 45876.2, 60 sec: 43827.5, 300 sec: 42792.8). Total num frames: 1451999232. Throughput: 0: 10730.0. Samples: 112986540. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:33,977][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:34,923][626795] Updated weights for policy 0, policy_version 177252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:36,708][626795] Updated weights for policy 0, policy_version 177262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:38,471][626795] Updated weights for policy 0, policy_version 177272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:38,976][24592] Fps is (10 sec: 46691.9, 60 sec: 43963.9, 300 sec: 42792.7). Total num frames: 1452236800. Throughput: 0: 10724.4. Samples: 113056254. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:38,977][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:40,222][626795] Updated weights for policy 0, policy_version 177282 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:43,975][24592] Fps is (10 sec: 35225.4, 60 sec: 42052.3, 300 sec: 42376.3). Total num frames: 1452351488. Throughput: 0: 10603.7. Samples: 113086350. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:43,977][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:44,518][626795] Updated weights for policy 0, policy_version 177292 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:46,410][626795] Updated weights for policy 0, policy_version 177302 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:48,224][626795] Updated weights for policy 0, policy_version 177312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:48,975][24592] Fps is (10 sec: 33588.9, 60 sec: 41642.7, 300 sec: 42320.8). Total num frames: 1452572672. Throughput: 0: 10004.3. Samples: 113130738. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:48,977][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:49,823][626795] Updated weights for policy 0, policy_version 177322 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:51,742][626795] Updated weights for policy 0, policy_version 177332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:53,440][626795] Updated weights for policy 0, policy_version 177342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:53,975][24592] Fps is (10 sec: 45055.8, 60 sec: 41642.7, 300 sec: 42320.7). Total num frames: 1452802048. Throughput: 0: 10813.4. Samples: 113199612. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:53,977][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:55,342][626795] Updated weights for policy 0, policy_version 177352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:57,059][626795] Updated weights for policy 0, policy_version 177362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:16:58,771][626795] Updated weights for policy 0, policy_version 177372 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:58,976][24592] Fps is (10 sec: 45873.7, 60 sec: 41642.4, 300 sec: 42777.7). Total num frames: 1453031424. Throughput: 0: 10845.2. Samples: 113233380. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:16:58,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:00,552][626795] Updated weights for policy 0, policy_version 177382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:02,280][626795] Updated weights for policy 0, policy_version 177392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:03,975][24592] Fps is (10 sec: 46694.2, 60 sec: 41650.6, 300 sec: 42820.5). Total num frames: 1453268992. Throughput: 0: 10899.8. Samples: 113303946. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:03,976][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000177401_1453268992.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:04,051][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000176156_1443069952.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:04,112][626795] Updated weights for policy 0, policy_version 177402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:05,755][626795] Updated weights for policy 0, policy_version 177412 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:07,599][626795] Updated weights for policy 0, policy_version 177422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:08,976][24592] Fps is (10 sec: 47513.6, 60 sec: 44137.7, 300 sec: 42848.3). Total num frames: 1453506560. Throughput: 0: 10921.3. Samples: 113373660. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:08,977][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:09,374][626795] Updated weights for policy 0, policy_version 177432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:11,072][626795] Updated weights for policy 0, policy_version 177442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:12,876][626795] Updated weights for policy 0, policy_version 177452 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:13,975][24592] Fps is (10 sec: 46695.0, 60 sec: 44236.9, 300 sec: 42820.6). Total num frames: 1453735936. Throughput: 0: 10916.5. Samples: 113408016. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:13,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:14,715][626795] Updated weights for policy 0, policy_version 177462 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:18,975][24592] Fps is (10 sec: 32768.9, 60 sec: 42052.5, 300 sec: 42320.7). Total num frames: 1453834240. Throughput: 0: 10488.5. Samples: 113458524. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:18,977][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:19,312][626795] Updated weights for policy 0, policy_version 177472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:21,073][626795] Updated weights for policy 0, policy_version 177482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:22,844][626795] Updated weights for policy 0, policy_version 177492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:23,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42052.4, 300 sec: 42320.7). Total num frames: 1454063616. Throughput: 0: 10166.4. Samples: 113513736. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:23,976][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:24,655][626795] Updated weights for policy 0, policy_version 177502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:26,421][626795] Updated weights for policy 0, policy_version 177512 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:28,124][626795] Updated weights for policy 0, policy_version 177522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:28,975][24592] Fps is (10 sec: 45875.5, 60 sec: 42052.3, 300 sec: 42320.8). Total num frames: 1454292992. Throughput: 0: 10260.7. Samples: 113548080. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:28,978][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:29,984][626795] Updated weights for policy 0, policy_version 177532 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:31,661][626795] Updated weights for policy 0, policy_version 177542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:33,381][626795] Updated weights for policy 0, policy_version 177552 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:33,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42188.8, 300 sec: 42806.7). Total num frames: 1454530560. Throughput: 0: 10822.9. Samples: 113617770. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:33,976][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:35,187][626795] Updated weights for policy 0, policy_version 177562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:37,006][626795] Updated weights for policy 0, policy_version 177572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:38,604][626795] Updated weights for policy 0, policy_version 177582 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:38,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42052.6, 300 sec: 42820.6). Total num frames: 1454759936. Throughput: 0: 10871.3. Samples: 113688822. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:38,977][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:40,445][626795] Updated weights for policy 0, policy_version 177592 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:42,327][626795] Updated weights for policy 0, policy_version 177602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:43,909][626795] Updated weights for policy 0, policy_version 177612 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:43,975][24592] Fps is (10 sec: 46694.2, 60 sec: 44100.2, 300 sec: 42848.3). Total num frames: 1454997504. Throughput: 0: 10871.0. Samples: 113722572. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:43,977][24592] Avg episode reward: [(0, '4.833')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:45,700][626795] Updated weights for policy 0, policy_version 177622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:47,586][626795] Updated weights for policy 0, policy_version 177632 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:48,976][24592] Fps is (10 sec: 46693.3, 60 sec: 44236.6, 300 sec: 42820.5). Total num frames: 1455226880. Throughput: 0: 10847.4. Samples: 113792082. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:48,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:49,366][626795] Updated weights for policy 0, policy_version 177642 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:53,924][626795] Updated weights for policy 0, policy_version 177652 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:53,976][24592] Fps is (10 sec: 32767.2, 60 sec: 42052.1, 300 sec: 42348.4). Total num frames: 1455325184. Throughput: 0: 10174.7. Samples: 113831520. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:53,977][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:55,530][626795] Updated weights for policy 0, policy_version 177662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:57,439][626795] Updated weights for policy 0, policy_version 177672 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:58,977][24592] Fps is (10 sec: 33584.2, 60 sec: 42188.2, 300 sec: 42348.3). Total num frames: 1455562752. Throughput: 0: 10136.8. Samples: 113864184. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:17:58,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:17:59,303][626795] Updated weights for policy 0, policy_version 177682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:00,877][626795] Updated weights for policy 0, policy_version 177692 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:02,689][626795] Updated weights for policy 0, policy_version 177702 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:03,975][24592] Fps is (10 sec: 45876.1, 60 sec: 41915.7, 300 sec: 42320.7). Total num frames: 1455783936. Throughput: 0: 10552.3. Samples: 113933376. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:03,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:04,588][626795] Updated weights for policy 0, policy_version 177712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:06,210][626795] Updated weights for policy 0, policy_version 177722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:08,051][626795] Updated weights for policy 0, policy_version 177732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:08,975][24592] Fps is (10 sec: 45880.4, 60 sec: 41916.0, 300 sec: 42774.8). Total num frames: 1456021504. Throughput: 0: 10875.5. Samples: 114003132. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:08,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:09,808][626795] Updated weights for policy 0, policy_version 177742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:11,512][626795] Updated weights for policy 0, policy_version 177752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:13,188][626795] Updated weights for policy 0, policy_version 177762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:13,976][24592] Fps is (10 sec: 47513.1, 60 sec: 42052.1, 300 sec: 42848.3). Total num frames: 1456259072. Throughput: 0: 10904.2. Samples: 114038772. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:13,976][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:15,089][626795] Updated weights for policy 0, policy_version 177772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:16,722][626795] Updated weights for policy 0, policy_version 177782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:18,477][626795] Updated weights for policy 0, policy_version 177792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:18,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44236.8, 300 sec: 42820.5). Total num frames: 1456488448. Throughput: 0: 10904.9. Samples: 114108492. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:18,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:20,291][626795] Updated weights for policy 0, policy_version 177802 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:22,065][626795] Updated weights for policy 0, policy_version 177812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:23,922][626795] Updated weights for policy 0, policy_version 177822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:23,975][24592] Fps is (10 sec: 45875.7, 60 sec: 44236.8, 300 sec: 42820.6). Total num frames: 1456717824. Throughput: 0: 10849.7. Samples: 114177060. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:23,976][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:25,749][626795] Updated weights for policy 0, policy_version 177832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:28,975][24592] Fps is (10 sec: 33587.4, 60 sec: 42188.8, 300 sec: 42348.5). Total num frames: 1456824320. Throughput: 0: 10704.4. Samples: 114204270. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:28,977][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:30,212][626795] Updated weights for policy 0, policy_version 177842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:32,057][626795] Updated weights for policy 0, policy_version 177852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:33,848][626795] Updated weights for policy 0, policy_version 177862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:33,975][24592] Fps is (10 sec: 32768.1, 60 sec: 41915.7, 300 sec: 42293.1). Total num frames: 1457045504. Throughput: 0: 10140.5. Samples: 114248400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:33,976][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:35,588][626795] Updated weights for policy 0, policy_version 177872 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:37,389][626795] Updated weights for policy 0, policy_version 177882 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:38,976][24592] Fps is (10 sec: 45055.1, 60 sec: 41915.6, 300 sec: 42293.0). Total num frames: 1457274880. Throughput: 0: 10796.2. Samples: 114317346. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:38,977][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:39,165][626795] Updated weights for policy 0, policy_version 177892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:40,889][626795] Updated weights for policy 0, policy_version 177902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:42,658][626795] Updated weights for policy 0, policy_version 177912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:43,975][24592] Fps is (10 sec: 46694.4, 60 sec: 41915.8, 300 sec: 42783.3). Total num frames: 1457512448. Throughput: 0: 10837.1. Samples: 114351840. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:43,977][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:44,469][626795] Updated weights for policy 0, policy_version 177922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:46,232][626795] Updated weights for policy 0, policy_version 177932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:47,927][626795] Updated weights for policy 0, policy_version 177942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:48,975][24592] Fps is (10 sec: 46695.1, 60 sec: 41915.9, 300 sec: 42848.4). Total num frames: 1457741824. Throughput: 0: 10873.7. Samples: 114422694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:48,976][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:49,683][626795] Updated weights for policy 0, policy_version 177952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:51,482][626795] Updated weights for policy 0, policy_version 177962 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:53,294][626795] Updated weights for policy 0, policy_version 177972 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:53,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44100.5, 300 sec: 42820.6). Total num frames: 1457971200. Throughput: 0: 10853.5. Samples: 114491538. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:53,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:55,045][626795] Updated weights for policy 0, policy_version 177982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:56,838][626795] Updated weights for policy 0, policy_version 177992 (0.0036)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:18:58,784][626795] Updated weights for policy 0, policy_version 178002 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:58,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44101.1, 300 sec: 42792.8). Total num frames: 1458208768. Throughput: 0: 10825.4. Samples: 114525912. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:18:58,977][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:00,361][626795] Updated weights for policy 0, policy_version 178012 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:03,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42052.3, 300 sec: 42348.5). Total num frames: 1458307072. Throughput: 0: 10366.0. Samples: 114574962. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:03,977][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000178016_1458307072.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:04,050][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000176769_1448091648.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:04,988][626795] Updated weights for policy 0, policy_version 178022 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:06,813][626795] Updated weights for policy 0, policy_version 178032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:08,580][626795] Updated weights for policy 0, policy_version 178042 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:08,975][24592] Fps is (10 sec: 32768.0, 60 sec: 41915.7, 300 sec: 42320.7). Total num frames: 1458536448. Throughput: 0: 10102.4. Samples: 114631668. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:08,977][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:10,329][626795] Updated weights for policy 0, policy_version 178052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:12,136][626795] Updated weights for policy 0, policy_version 178062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:13,837][626795] Updated weights for policy 0, policy_version 178072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:13,975][24592] Fps is (10 sec: 45875.2, 60 sec: 41779.3, 300 sec: 42265.2). Total num frames: 1458765824. Throughput: 0: 10253.9. Samples: 114665694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:13,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:15,678][626795] Updated weights for policy 0, policy_version 178082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:17,371][626795] Updated weights for policy 0, policy_version 178092 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:18,975][24592] Fps is (10 sec: 45874.8, 60 sec: 41779.1, 300 sec: 42749.0). Total num frames: 1458995200. Throughput: 0: 10826.2. Samples: 114735582. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:18,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:19,192][626795] Updated weights for policy 0, policy_version 178102 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:20,924][626795] Updated weights for policy 0, policy_version 178112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:22,695][626795] Updated weights for policy 0, policy_version 178122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:23,975][24592] Fps is (10 sec: 46694.3, 60 sec: 41915.8, 300 sec: 42848.4). Total num frames: 1459232768. Throughput: 0: 10851.2. Samples: 114805650. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:23,977][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:24,430][626795] Updated weights for policy 0, policy_version 178132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:26,238][626795] Updated weights for policy 0, policy_version 178142 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:27,995][626795] Updated weights for policy 0, policy_version 178152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:28,976][24592] Fps is (10 sec: 46694.0, 60 sec: 43963.6, 300 sec: 42792.8). Total num frames: 1459462144. Throughput: 0: 10852.0. Samples: 114840180. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:28,978][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:29,871][626795] Updated weights for policy 0, policy_version 178162 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:31,568][626795] Updated weights for policy 0, policy_version 178172 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:33,343][626795] Updated weights for policy 0, policy_version 178182 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:33,976][24592] Fps is (10 sec: 45872.6, 60 sec: 44099.9, 300 sec: 42764.9). Total num frames: 1459691520. Throughput: 0: 10810.4. Samples: 114909168. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:33,977][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:35,214][626795] Updated weights for policy 0, policy_version 178192 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:38,975][24592] Fps is (10 sec: 33587.8, 60 sec: 42052.4, 300 sec: 42348.5). Total num frames: 1459798016. Throughput: 0: 10117.7. Samples: 114946836. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:38,976][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:39,658][626795] Updated weights for policy 0, policy_version 178202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:41,496][626795] Updated weights for policy 0, policy_version 178212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:43,263][626795] Updated weights for policy 0, policy_version 178222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:43,976][24592] Fps is (10 sec: 33586.9, 60 sec: 41915.3, 300 sec: 42320.6). Total num frames: 1460027392. Throughput: 0: 10110.0. Samples: 114980868. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:43,978][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:44,912][626795] Updated weights for policy 0, policy_version 178232 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:46,783][626795] Updated weights for policy 0, policy_version 178242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:48,543][626795] Updated weights for policy 0, policy_version 178252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:48,975][24592] Fps is (10 sec: 45874.7, 60 sec: 41915.7, 300 sec: 42293.0). Total num frames: 1460256768. Throughput: 0: 10555.2. Samples: 115049946. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:48,976][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:50,363][626795] Updated weights for policy 0, policy_version 178262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:52,108][626795] Updated weights for policy 0, policy_version 178272 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:53,776][626795] Updated weights for policy 0, policy_version 178282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:53,975][24592] Fps is (10 sec: 46697.6, 60 sec: 42052.3, 300 sec: 42768.7). Total num frames: 1460494336. Throughput: 0: 10865.7. Samples: 115120626. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:53,977][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:55,647][626795] Updated weights for policy 0, policy_version 178292 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:57,360][626795] Updated weights for policy 0, policy_version 178302 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:58,975][24592] Fps is (10 sec: 45875.7, 60 sec: 41779.2, 300 sec: 42820.6). Total num frames: 1460715520. Throughput: 0: 10873.7. Samples: 115155012. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:19:58,976][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:19:59,082][626795] Updated weights for policy 0, policy_version 178312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:00,941][626795] Updated weights for policy 0, policy_version 178322 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:02,672][626795] Updated weights for policy 0, policy_version 178332 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:03,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44100.3, 300 sec: 42848.3). Total num frames: 1460953088. Throughput: 0: 10875.8. Samples: 115224990. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:03,976][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:04,446][626795] Updated weights for policy 0, policy_version 178342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:06,203][626795] Updated weights for policy 0, policy_version 178352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:07,903][626795] Updated weights for policy 0, policy_version 178362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:08,976][24592] Fps is (10 sec: 46691.0, 60 sec: 44099.7, 300 sec: 42848.2). Total num frames: 1461182464. Throughput: 0: 10857.2. Samples: 115294230. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:08,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:09,800][626795] Updated weights for policy 0, policy_version 178372 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:13,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42052.3, 300 sec: 42431.8). Total num frames: 1461288960. Throughput: 0: 10651.5. Samples: 115319496. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:13,979][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:14,316][626795] Updated weights for policy 0, policy_version 178382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:16,087][626795] Updated weights for policy 0, policy_version 178392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:17,878][626795] Updated weights for policy 0, policy_version 178402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:18,975][24592] Fps is (10 sec: 33589.4, 60 sec: 42052.3, 300 sec: 42431.8). Total num frames: 1461518336. Throughput: 0: 10142.5. Samples: 115365576. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:18,976][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:19,592][626795] Updated weights for policy 0, policy_version 178412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:21,422][626795] Updated weights for policy 0, policy_version 178422 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:23,185][626795] Updated weights for policy 0, policy_version 178432 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:23,975][24592] Fps is (10 sec: 45874.6, 60 sec: 41915.6, 300 sec: 42431.8). Total num frames: 1461747712. Throughput: 0: 10835.8. Samples: 115434450. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:23,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:24,911][626795] Updated weights for policy 0, policy_version 178442 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:26,698][626795] Updated weights for policy 0, policy_version 178452 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:28,420][626795] Updated weights for policy 0, policy_version 178462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:28,975][24592] Fps is (10 sec: 46694.8, 60 sec: 42052.4, 300 sec: 42765.1). Total num frames: 1461985280. Throughput: 0: 10871.9. Samples: 115470096. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:28,977][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:30,161][626795] Updated weights for policy 0, policy_version 178472 (0.0040)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:31,959][626795] Updated weights for policy 0, policy_version 178482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:33,668][626795] Updated weights for policy 0, policy_version 178492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:33,975][24592] Fps is (10 sec: 46695.2, 60 sec: 42052.7, 300 sec: 42765.1). Total num frames: 1462214656. Throughput: 0: 10899.1. Samples: 115540404. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:33,976][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:35,430][626795] Updated weights for policy 0, policy_version 178502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:37,340][626795] Updated weights for policy 0, policy_version 178512 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:38,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44100.3, 300 sec: 42765.0). Total num frames: 1462444032. Throughput: 0: 10857.7. Samples: 115609224. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:38,977][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:39,025][626795] Updated weights for policy 0, policy_version 178522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:40,835][626795] Updated weights for policy 0, policy_version 178532 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:42,617][626795] Updated weights for policy 0, policy_version 178542 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:43,975][24592] Fps is (10 sec: 45875.1, 60 sec: 44100.8, 300 sec: 42709.5). Total num frames: 1462673408. Throughput: 0: 10854.7. Samples: 115643472. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:43,978][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:44,409][626795] Updated weights for policy 0, policy_version 178552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:48,806][626795] Updated weights for policy 0, policy_version 178562 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:48,975][24592] Fps is (10 sec: 33587.0, 60 sec: 42052.3, 300 sec: 42292.9). Total num frames: 1462779904. Throughput: 0: 10380.1. Samples: 115692096. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:48,976][24592] Avg episode reward: [(0, '4.958')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:50,672][626795] Updated weights for policy 0, policy_version 178572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:52,497][626795] Updated weights for policy 0, policy_version 178582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:53,975][24592] Fps is (10 sec: 33587.0, 60 sec: 41915.7, 300 sec: 42292.9). Total num frames: 1463009280. Throughput: 0: 10137.6. Samples: 115750416. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:53,978][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:54,186][626795] Updated weights for policy 0, policy_version 178592 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:56,007][626795] Updated weights for policy 0, policy_version 178602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:57,794][626795] Updated weights for policy 0, policy_version 178612 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:58,975][24592] Fps is (10 sec: 46694.7, 60 sec: 42188.8, 300 sec: 42294.6). Total num frames: 1463246848. Throughput: 0: 10338.8. Samples: 115784742. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:20:58,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:20:59,542][626795] Updated weights for policy 0, policy_version 178622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:01,343][626795] Updated weights for policy 0, policy_version 178632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:03,078][626795] Updated weights for policy 0, policy_version 178642 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:03,976][24592] Fps is (10 sec: 46692.4, 60 sec: 42051.9, 300 sec: 42772.4). Total num frames: 1463476224. Throughput: 0: 10866.2. Samples: 115854558. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:03,976][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000178647_1463476224.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:04,043][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000177401_1453268992.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:04,840][626795] Updated weights for policy 0, policy_version 178652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:06,655][626795] Updated weights for policy 0, policy_version 178662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:08,484][626795] Updated weights for policy 0, policy_version 178672 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:08,975][24592] Fps is (10 sec: 45055.9, 60 sec: 41916.3, 300 sec: 42765.0). Total num frames: 1463697408. Throughput: 0: 10861.6. Samples: 115923222. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:08,976][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:10,297][626795] Updated weights for policy 0, policy_version 178682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:11,936][626795] Updated weights for policy 0, policy_version 178692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:13,810][626795] Updated weights for policy 0, policy_version 178702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:13,975][24592] Fps is (10 sec: 45877.1, 60 sec: 44100.2, 300 sec: 42792.8). Total num frames: 1463934976. Throughput: 0: 10835.2. Samples: 115957680. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:13,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:15,498][626795] Updated weights for policy 0, policy_version 178712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:17,338][626795] Updated weights for policy 0, policy_version 178722 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:18,976][24592] Fps is (10 sec: 46693.4, 60 sec: 44100.2, 300 sec: 42792.8). Total num frames: 1464164352. Throughput: 0: 10809.9. Samples: 116026854. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:18,977][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:19,222][626795] Updated weights for policy 0, policy_version 178732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:23,632][626795] Updated weights for policy 0, policy_version 178742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:23,976][24592] Fps is (10 sec: 32767.5, 60 sec: 41915.7, 300 sec: 42348.4). Total num frames: 1464262656. Throughput: 0: 10117.4. Samples: 116064510. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:23,978][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:25,335][626795] Updated weights for policy 0, policy_version 178752 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:27,132][626795] Updated weights for policy 0, policy_version 178762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:28,947][626795] Updated weights for policy 0, policy_version 178772 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:28,975][24592] Fps is (10 sec: 33587.9, 60 sec: 41915.7, 300 sec: 42376.2). Total num frames: 1464500224. Throughput: 0: 10118.4. Samples: 116098800. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:28,977][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:30,743][626795] Updated weights for policy 0, policy_version 178782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:32,476][626795] Updated weights for policy 0, policy_version 178792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:33,975][24592] Fps is (10 sec: 46695.4, 60 sec: 41915.7, 300 sec: 42348.6). Total num frames: 1464729600. Throughput: 0: 10588.3. Samples: 116168568. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:33,977][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:34,294][626795] Updated weights for policy 0, policy_version 178802 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:36,049][626795] Updated weights for policy 0, policy_version 178812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:37,775][626795] Updated weights for policy 0, policy_version 178822 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:38,975][24592] Fps is (10 sec: 45874.9, 60 sec: 41915.7, 300 sec: 42737.2). Total num frames: 1464958976. Throughput: 0: 10852.4. Samples: 116238774. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:38,977][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:39,521][626795] Updated weights for policy 0, policy_version 178832 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:41,277][626795] Updated weights for policy 0, policy_version 178842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:43,007][626795] Updated weights for policy 0, policy_version 178852 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:43,976][24592] Fps is (10 sec: 46693.5, 60 sec: 42052.1, 300 sec: 42792.8). Total num frames: 1465196544. Throughput: 0: 10851.2. Samples: 116273046. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:43,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:44,794][626795] Updated weights for policy 0, policy_version 178862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:46,633][626795] Updated weights for policy 0, policy_version 178872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:48,384][626795] Updated weights for policy 0, policy_version 178882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:48,975][24592] Fps is (10 sec: 46694.8, 60 sec: 44100.3, 300 sec: 42792.8). Total num frames: 1465425920. Throughput: 0: 10834.5. Samples: 116342106. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:48,977][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:50,185][626795] Updated weights for policy 0, policy_version 178892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:51,940][626795] Updated weights for policy 0, policy_version 178902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:53,712][626795] Updated weights for policy 0, policy_version 178912 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:53,975][24592] Fps is (10 sec: 45875.9, 60 sec: 44100.3, 300 sec: 42792.8). Total num frames: 1465655296. Throughput: 0: 10852.1. Samples: 116411568. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:53,977][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:21:55,488][626795] Updated weights for policy 0, policy_version 178922 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:58,976][24592] Fps is (10 sec: 33584.5, 60 sec: 41915.2, 300 sec: 42348.4). Total num frames: 1465761792. Throughput: 0: 10629.7. Samples: 116436024. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:21:58,977][24592] Avg episode reward: [(0, '4.505')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:00,019][626795] Updated weights for policy 0, policy_version 178932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:01,778][626795] Updated weights for policy 0, policy_version 178942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:03,537][626795] Updated weights for policy 0, policy_version 178952 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:03,975][24592] Fps is (10 sec: 32768.1, 60 sec: 41779.5, 300 sec: 42293.0). Total num frames: 1465982976. Throughput: 0: 10157.1. Samples: 116483922. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:03,977][24592] Avg episode reward: [(0, '4.824')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:05,320][626795] Updated weights for policy 0, policy_version 178962 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:07,163][626795] Updated weights for policy 0, policy_version 178972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:08,845][626795] Updated weights for policy 0, policy_version 178982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:08,975][24592] Fps is (10 sec: 45878.6, 60 sec: 42052.2, 300 sec: 42320.7). Total num frames: 1466220544. Throughput: 0: 10860.8. Samples: 116553246. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:08,977][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:10,637][626795] Updated weights for policy 0, policy_version 178992 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:12,369][626795] Updated weights for policy 0, policy_version 179002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:13,976][24592] Fps is (10 sec: 47512.6, 60 sec: 42052.2, 300 sec: 42792.8). Total num frames: 1466458112. Throughput: 0: 10878.1. Samples: 116588316. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:13,977][24592] Avg episode reward: [(0, '4.897')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:14,024][626795] Updated weights for policy 0, policy_version 179012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:15,892][626795] Updated weights for policy 0, policy_version 179022 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:15,910][626772] Signal inference workers to stop experience collection... (1650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:15,911][626772] Signal inference workers to resume experience collection... (1650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:15,917][626795] InferenceWorker_p0-w0: stopping experience collection (1650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:15,929][626795] InferenceWorker_p0-w0: resuming experience collection (1650 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:17,698][626795] Updated weights for policy 0, policy_version 179032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:18,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42052.4, 300 sec: 42792.8). Total num frames: 1466687488. Throughput: 0: 10883.3. Samples: 116658318. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:18,978][24592] Avg episode reward: [(0, '4.860')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:19,384][626795] Updated weights for policy 0, policy_version 179042 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:21,278][626795] Updated weights for policy 0, policy_version 179052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:22,960][626795] Updated weights for policy 0, policy_version 179062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:23,975][24592] Fps is (10 sec: 45876.0, 60 sec: 44236.9, 300 sec: 42792.8). Total num frames: 1466916864. Throughput: 0: 10856.9. Samples: 116727336. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:23,977][24592] Avg episode reward: [(0, '4.925')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:24,803][626795] Updated weights for policy 0, policy_version 179072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:26,549][626795] Updated weights for policy 0, policy_version 179082 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:28,282][626795] Updated weights for policy 0, policy_version 179092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:28,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44100.3, 300 sec: 42765.0). Total num frames: 1467146240. Throughput: 0: 10860.6. Samples: 116761770. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:28,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:30,108][626795] Updated weights for policy 0, policy_version 179102 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:33,975][24592] Fps is (10 sec: 33586.8, 60 sec: 42052.2, 300 sec: 42348.5). Total num frames: 1467252736. Throughput: 0: 10367.4. Samples: 116808642. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:33,977][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:34,647][626795] Updated weights for policy 0, policy_version 179112 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:36,390][626795] Updated weights for policy 0, policy_version 179122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:38,158][626795] Updated weights for policy 0, policy_version 179132 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:38,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42052.3, 300 sec: 42320.7). Total num frames: 1467482112. Throughput: 0: 10144.5. Samples: 116868072. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:38,976][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:40,008][626795] Updated weights for policy 0, policy_version 179142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:41,786][626795] Updated weights for policy 0, policy_version 179152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:43,531][626795] Updated weights for policy 0, policy_version 179162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:43,976][24592] Fps is (10 sec: 45874.0, 60 sec: 41915.6, 300 sec: 42320.7). Total num frames: 1467711488. Throughput: 0: 10369.7. Samples: 116902656. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:43,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:45,332][626795] Updated weights for policy 0, policy_version 179172 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:47,068][626795] Updated weights for policy 0, policy_version 179182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:48,805][626795] Updated weights for policy 0, policy_version 179192 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:48,975][24592] Fps is (10 sec: 46694.0, 60 sec: 42052.2, 300 sec: 42792.8). Total num frames: 1467949056. Throughput: 0: 10852.0. Samples: 116972262. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:48,976][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:50,601][626795] Updated weights for policy 0, policy_version 179202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:52,357][626795] Updated weights for policy 0, policy_version 179212 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:53,976][24592] Fps is (10 sec: 45875.7, 60 sec: 41915.5, 300 sec: 42737.4). Total num frames: 1468170240. Throughput: 0: 10850.3. Samples: 117041514. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:53,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:54,106][626795] Updated weights for policy 0, policy_version 179222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:55,936][626795] Updated weights for policy 0, policy_version 179232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:57,728][626795] Updated weights for policy 0, policy_version 179242 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:58,975][24592] Fps is (10 sec: 45056.2, 60 sec: 43964.3, 300 sec: 42765.0). Total num frames: 1468399616. Throughput: 0: 10838.3. Samples: 117076038. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:22:58,977][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:22:59,502][626795] Updated weights for policy 0, policy_version 179252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:01,362][626795] Updated weights for policy 0, policy_version 179262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:03,008][626795] Updated weights for policy 0, policy_version 179272 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:03,976][24592] Fps is (10 sec: 46693.9, 60 sec: 44236.5, 300 sec: 42765.0). Total num frames: 1468637184. Throughput: 0: 10808.6. Samples: 117144708. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:03,976][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000179277_1468637184.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:04,026][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000178016_1458307072.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:04,901][626795] Updated weights for policy 0, policy_version 179282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:08,975][24592] Fps is (10 sec: 33587.3, 60 sec: 41915.8, 300 sec: 42293.0). Total num frames: 1468735488. Throughput: 0: 10106.0. Samples: 117182106. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:08,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:09,359][626795] Updated weights for policy 0, policy_version 179292 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:11,151][626795] Updated weights for policy 0, policy_version 179302 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:12,969][626795] Updated weights for policy 0, policy_version 179312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:13,975][24592] Fps is (10 sec: 33588.5, 60 sec: 41915.9, 300 sec: 42320.7). Total num frames: 1468973056. Throughput: 0: 10109.2. Samples: 117216684. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:13,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:14,794][626795] Updated weights for policy 0, policy_version 179322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:16,363][626795] Updated weights for policy 0, policy_version 179332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:18,299][626795] Updated weights for policy 0, policy_version 179342 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:18,975][24592] Fps is (10 sec: 45875.2, 60 sec: 41779.2, 300 sec: 42292.9). Total num frames: 1469194240. Throughput: 0: 10592.6. Samples: 117285306. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:18,977][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:20,031][626795] Updated weights for policy 0, policy_version 179352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:21,857][626795] Updated weights for policy 0, policy_version 179362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:23,513][626795] Updated weights for policy 0, policy_version 179372 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:23,975][24592] Fps is (10 sec: 45875.1, 60 sec: 41915.7, 300 sec: 42737.2). Total num frames: 1469431808. Throughput: 0: 10830.8. Samples: 117355458. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:23,977][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:25,389][626795] Updated weights for policy 0, policy_version 179382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:27,058][626795] Updated weights for policy 0, policy_version 179392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:28,866][626795] Updated weights for policy 0, policy_version 179402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:28,975][24592] Fps is (10 sec: 46694.6, 60 sec: 41915.8, 300 sec: 42765.0). Total num frames: 1469661184. Throughput: 0: 10841.7. Samples: 117390528. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:28,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:30,678][626795] Updated weights for policy 0, policy_version 179412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:32,428][626795] Updated weights for policy 0, policy_version 179422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:33,976][24592] Fps is (10 sec: 45872.3, 60 sec: 43963.4, 300 sec: 42764.9). Total num frames: 1469890560. Throughput: 0: 10824.4. Samples: 117459366. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:33,981][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:34,130][626795] Updated weights for policy 0, policy_version 179432 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:35,939][626795] Updated weights for policy 0, policy_version 179442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:37,710][626795] Updated weights for policy 0, policy_version 179452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:38,976][24592] Fps is (10 sec: 46693.5, 60 sec: 44100.2, 300 sec: 42765.0). Total num frames: 1470128128. Throughput: 0: 10850.6. Samples: 117529788. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:38,978][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:39,399][626795] Updated weights for policy 0, policy_version 179462 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:43,976][24592] Fps is (10 sec: 33588.0, 60 sec: 41915.7, 300 sec: 42320.6). Total num frames: 1470226432. Throughput: 0: 10601.9. Samples: 117553128. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:43,978][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:44,002][626795] Updated weights for policy 0, policy_version 179472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:45,735][626795] Updated weights for policy 0, policy_version 179482 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:47,430][626795] Updated weights for policy 0, policy_version 179492 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:48,975][24592] Fps is (10 sec: 33587.6, 60 sec: 41915.8, 300 sec: 42348.5). Total num frames: 1470464000. Throughput: 0: 10166.6. Samples: 117602202. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:48,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:49,224][626795] Updated weights for policy 0, policy_version 179502 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:51,050][626795] Updated weights for policy 0, policy_version 179512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:52,809][626795] Updated weights for policy 0, policy_version 179522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:53,975][24592] Fps is (10 sec: 46696.0, 60 sec: 42052.4, 300 sec: 42320.7). Total num frames: 1470693376. Throughput: 0: 10871.5. Samples: 117671322. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:53,977][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:54,601][626795] Updated weights for policy 0, policy_version 179532 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:56,388][626795] Updated weights for policy 0, policy_version 179542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:58,142][626795] Updated weights for policy 0, policy_version 179552 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:58,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42052.3, 300 sec: 42765.0). Total num frames: 1470922752. Throughput: 0: 10884.5. Samples: 117706488. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:23:58,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:23:59,953][626795] Updated weights for policy 0, policy_version 179562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:01,708][626795] Updated weights for policy 0, policy_version 179572 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:03,435][626795] Updated weights for policy 0, policy_version 179582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:03,976][24592] Fps is (10 sec: 46692.8, 60 sec: 42052.3, 300 sec: 42792.7). Total num frames: 1471160320. Throughput: 0: 10882.4. Samples: 117775020. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:03,977][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:05,269][626795] Updated weights for policy 0, policy_version 179592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:06,965][626795] Updated weights for policy 0, policy_version 179602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:08,764][626795] Updated weights for policy 0, policy_version 179612 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:08,975][24592] Fps is (10 sec: 46694.1, 60 sec: 44236.8, 300 sec: 42792.8). Total num frames: 1471389696. Throughput: 0: 10874.0. Samples: 117844788. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:08,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:10,574][626795] Updated weights for policy 0, policy_version 179622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:12,418][626795] Updated weights for policy 0, policy_version 179632 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:13,975][24592] Fps is (10 sec: 45877.0, 60 sec: 44100.3, 300 sec: 42792.8). Total num frames: 1471619072. Throughput: 0: 10852.4. Samples: 117878886. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:13,977][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:14,181][626795] Updated weights for policy 0, policy_version 179642 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:18,774][626795] Updated weights for policy 0, policy_version 179652 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:18,976][24592] Fps is (10 sec: 32766.1, 60 sec: 42051.8, 300 sec: 42320.6). Total num frames: 1471717376. Throughput: 0: 10335.2. Samples: 117924450. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:18,977][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:20,521][626795] Updated weights for policy 0, policy_version 179662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:22,193][626795] Updated weights for policy 0, policy_version 179672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:23,957][626795] Updated weights for policy 0, policy_version 179682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:23,976][24592] Fps is (10 sec: 33586.9, 60 sec: 42052.2, 300 sec: 42348.5). Total num frames: 1471954944. Throughput: 0: 10124.7. Samples: 117985398. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:23,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:25,761][626795] Updated weights for policy 0, policy_version 179692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:27,552][626795] Updated weights for policy 0, policy_version 179702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:28,975][24592] Fps is (10 sec: 45877.8, 60 sec: 41915.6, 300 sec: 42320.8). Total num frames: 1472176128. Throughput: 0: 10376.1. Samples: 118020048. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:28,977][24592] Avg episode reward: [(0, '4.935')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:29,277][626795] Updated weights for policy 0, policy_version 179712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:31,074][626795] Updated weights for policy 0, policy_version 179722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:32,885][626795] Updated weights for policy 0, policy_version 179732 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:33,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42052.7, 300 sec: 42765.0). Total num frames: 1472413696. Throughput: 0: 10836.9. Samples: 118089864. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:33,977][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:34,533][626795] Updated weights for policy 0, policy_version 179742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:36,238][626795] Updated weights for policy 0, policy_version 179752 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:37,993][626795] Updated weights for policy 0, policy_version 179762 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:38,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42052.3, 300 sec: 42792.9). Total num frames: 1472651264. Throughput: 0: 10844.9. Samples: 118159344. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:38,977][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:39,919][626795] Updated weights for policy 0, policy_version 179772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:41,651][626795] Updated weights for policy 0, policy_version 179782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:43,375][626795] Updated weights for policy 0, policy_version 179792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:43,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44237.1, 300 sec: 42792.8). Total num frames: 1472880640. Throughput: 0: 10842.8. Samples: 118194414. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:43,976][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:45,189][626795] Updated weights for policy 0, policy_version 179802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:46,844][626795] Updated weights for policy 0, policy_version 179812 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:48,622][626795] Updated weights for policy 0, policy_version 179822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:48,975][24592] Fps is (10 sec: 45875.4, 60 sec: 44100.3, 300 sec: 42765.0). Total num frames: 1473110016. Throughput: 0: 10881.6. Samples: 118264686. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:48,976][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:50,539][626795] Updated weights for policy 0, policy_version 179832 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:53,975][24592] Fps is (10 sec: 32767.8, 60 sec: 41915.7, 300 sec: 42348.5). Total num frames: 1473208320. Throughput: 0: 10128.1. Samples: 118300554. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:53,977][24592] Avg episode reward: [(0, '4.798')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:55,109][626795] Updated weights for policy 0, policy_version 179842 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:56,935][626795] Updated weights for policy 0, policy_version 179852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:24:58,566][626795] Updated weights for policy 0, policy_version 179862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:58,975][24592] Fps is (10 sec: 32768.0, 60 sec: 41915.7, 300 sec: 42320.7). Total num frames: 1473437696. Throughput: 0: 10134.4. Samples: 118334934. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:24:58,976][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:00,476][626795] Updated weights for policy 0, policy_version 179872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:02,178][626795] Updated weights for policy 0, policy_version 179882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:03,975][24592] Fps is (10 sec: 45875.5, 60 sec: 41779.5, 300 sec: 42320.8). Total num frames: 1473667072. Throughput: 0: 10654.4. Samples: 118403892. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:03,976][24592] Avg episode reward: [(0, '4.493')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000179891_1473667072.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:04,064][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000178647_1463476224.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:04,112][626795] Updated weights for policy 0, policy_version 179892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:05,755][626795] Updated weights for policy 0, policy_version 179902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:07,540][626795] Updated weights for policy 0, policy_version 179912 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:08,975][24592] Fps is (10 sec: 46694.4, 60 sec: 41915.8, 300 sec: 42765.0). Total num frames: 1473904640. Throughput: 0: 10840.3. Samples: 118473210. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:08,976][24592] Avg episode reward: [(0, '4.837')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:09,340][626795] Updated weights for policy 0, policy_version 179922 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:11,038][626795] Updated weights for policy 0, policy_version 179932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:12,852][626795] Updated weights for policy 0, policy_version 179942 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:13,975][24592] Fps is (10 sec: 46694.0, 60 sec: 41915.7, 300 sec: 42765.0). Total num frames: 1474134016. Throughput: 0: 10853.6. Samples: 118508460. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:13,976][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:14,625][626795] Updated weights for policy 0, policy_version 179952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:16,365][626795] Updated weights for policy 0, policy_version 179962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:18,057][626795] Updated weights for policy 0, policy_version 179972 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:18,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44237.3, 300 sec: 42792.8). Total num frames: 1474371584. Throughput: 0: 10847.9. Samples: 118578018. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:18,977][24592] Avg episode reward: [(0, '4.493')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:19,940][626795] Updated weights for policy 0, policy_version 179982 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:21,657][626795] Updated weights for policy 0, policy_version 179992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:23,445][626795] Updated weights for policy 0, policy_version 180002 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:23,975][24592] Fps is (10 sec: 46694.9, 60 sec: 44100.4, 300 sec: 42765.0). Total num frames: 1474600960. Throughput: 0: 10845.9. Samples: 118647408. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:23,976][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:25,214][626795] Updated weights for policy 0, policy_version 180012 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:28,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42052.3, 300 sec: 42320.7). Total num frames: 1474699264. Throughput: 0: 10603.3. Samples: 118671564. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:28,977][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:29,871][626795] Updated weights for policy 0, policy_version 180022 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:31,658][626795] Updated weights for policy 0, policy_version 180032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:33,510][626795] Updated weights for policy 0, policy_version 180042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:33,976][24592] Fps is (10 sec: 31948.2, 60 sec: 41779.1, 300 sec: 42292.9). Total num frames: 1474920448. Throughput: 0: 10067.6. Samples: 118717728. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:33,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:35,251][626795] Updated weights for policy 0, policy_version 180052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:37,028][626795] Updated weights for policy 0, policy_version 180062 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:38,897][626795] Updated weights for policy 0, policy_version 180072 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:38,976][24592] Fps is (10 sec: 45055.2, 60 sec: 41642.6, 300 sec: 42292.9). Total num frames: 1475149824. Throughput: 0: 10789.6. Samples: 118786086. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:38,978][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:40,541][626795] Updated weights for policy 0, policy_version 180082 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:42,316][626795] Updated weights for policy 0, policy_version 180092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:43,975][24592] Fps is (10 sec: 46695.3, 60 sec: 41779.2, 300 sec: 42737.3). Total num frames: 1475387392. Throughput: 0: 10804.1. Samples: 118821120. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:43,976][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:44,148][626795] Updated weights for policy 0, policy_version 180102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:45,770][626795] Updated weights for policy 0, policy_version 180112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:47,470][626795] Updated weights for policy 0, policy_version 180122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:48,976][24592] Fps is (10 sec: 47511.5, 60 sec: 41915.3, 300 sec: 42764.9). Total num frames: 1475624960. Throughput: 0: 10848.3. Samples: 118892070. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:48,978][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:49,299][626795] Updated weights for policy 0, policy_version 180132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:51,055][626795] Updated weights for policy 0, policy_version 180142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:52,864][626795] Updated weights for policy 0, policy_version 180152 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:53,976][24592] Fps is (10 sec: 46692.7, 60 sec: 44100.0, 300 sec: 42737.2). Total num frames: 1475854336. Throughput: 0: 10854.0. Samples: 118961646. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:53,977][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:54,617][626795] Updated weights for policy 0, policy_version 180162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:56,375][626795] Updated weights for policy 0, policy_version 180172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:58,119][626795] Updated weights for policy 0, policy_version 180182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:58,975][24592] Fps is (10 sec: 45878.0, 60 sec: 44100.2, 300 sec: 42737.3). Total num frames: 1476083712. Throughput: 0: 10836.4. Samples: 118996098. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:25:58,978][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:25:59,920][626795] Updated weights for policy 0, policy_version 180192 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:03,976][24592] Fps is (10 sec: 31948.4, 60 sec: 41778.9, 300 sec: 42292.9). Total num frames: 1476173824. Throughput: 0: 10368.4. Samples: 119044602. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:03,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:04,734][626795] Updated weights for policy 0, policy_version 180202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:06,376][626795] Updated weights for policy 0, policy_version 180212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:08,335][626795] Updated weights for policy 0, policy_version 180222 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:08,975][24592] Fps is (10 sec: 31948.8, 60 sec: 41642.6, 300 sec: 42265.2). Total num frames: 1476403200. Throughput: 0: 10037.2. Samples: 119099082. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:08,977][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:10,230][626795] Updated weights for policy 0, policy_version 180232 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:11,983][626795] Updated weights for policy 0, policy_version 180242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:13,777][626795] Updated weights for policy 0, policy_version 180252 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:13,975][24592] Fps is (10 sec: 45877.4, 60 sec: 41642.7, 300 sec: 42265.2). Total num frames: 1476632576. Throughput: 0: 10241.9. Samples: 119132448. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:13,977][24592] Avg episode reward: [(0, '4.899')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:15,493][626795] Updated weights for policy 0, policy_version 180262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:17,197][626795] Updated weights for policy 0, policy_version 180272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:18,976][24592] Fps is (10 sec: 45870.1, 60 sec: 41505.3, 300 sec: 42709.3). Total num frames: 1476861952. Throughput: 0: 10776.4. Samples: 119202678. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:18,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:19,006][626795] Updated weights for policy 0, policy_version 180282 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:20,671][626795] Updated weights for policy 0, policy_version 180292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:22,383][626795] Updated weights for policy 0, policy_version 180302 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:23,975][24592] Fps is (10 sec: 47513.4, 60 sec: 41779.2, 300 sec: 42737.2). Total num frames: 1477107712. Throughput: 0: 10844.4. Samples: 119274084. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:23,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:24,133][626795] Updated weights for policy 0, policy_version 180312 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:25,865][626795] Updated weights for policy 0, policy_version 180322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:27,613][626795] Updated weights for policy 0, policy_version 180332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:28,975][24592] Fps is (10 sec: 48337.9, 60 sec: 44100.2, 300 sec: 42765.0). Total num frames: 1477345280. Throughput: 0: 10845.9. Samples: 119309184. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:28,978][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:29,404][626795] Updated weights for policy 0, policy_version 180342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:31,048][626795] Updated weights for policy 0, policy_version 180352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:32,846][626795] Updated weights for policy 0, policy_version 180362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:33,976][24592] Fps is (10 sec: 46693.2, 60 sec: 44236.7, 300 sec: 42765.0). Total num frames: 1477574656. Throughput: 0: 10834.5. Samples: 119379618. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:33,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:34,658][626795] Updated weights for policy 0, policy_version 180372 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:38,976][24592] Fps is (10 sec: 32765.8, 60 sec: 42051.9, 300 sec: 42292.9). Total num frames: 1477672960. Throughput: 0: 10155.5. Samples: 119418648. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:38,977][24592] Avg episode reward: [(0, '4.899')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:39,373][626795] Updated weights for policy 0, policy_version 180382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:41,125][626795] Updated weights for policy 0, policy_version 180392 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:43,040][626795] Updated weights for policy 0, policy_version 180402 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:43,975][24592] Fps is (10 sec: 31949.4, 60 sec: 41779.1, 300 sec: 42265.1). Total num frames: 1477894144. Throughput: 0: 10069.7. Samples: 119449236. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:43,976][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:44,779][626795] Updated weights for policy 0, policy_version 180412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:46,640][626795] Updated weights for policy 0, policy_version 180422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:48,402][626795] Updated weights for policy 0, policy_version 180432 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:48,975][24592] Fps is (10 sec: 45059.2, 60 sec: 41643.1, 300 sec: 42265.2). Total num frames: 1478123520. Throughput: 0: 10485.3. Samples: 119516436. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:48,977][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:50,087][626795] Updated weights for policy 0, policy_version 180442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:51,787][626795] Updated weights for policy 0, policy_version 180452 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:53,637][626795] Updated weights for policy 0, policy_version 180462 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:53,975][24592] Fps is (10 sec: 46694.8, 60 sec: 41779.4, 300 sec: 42709.6). Total num frames: 1478361088. Throughput: 0: 10853.2. Samples: 119587476. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:53,976][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:55,321][626795] Updated weights for policy 0, policy_version 180472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:56,927][626795] Updated weights for policy 0, policy_version 180482 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:26:58,750][626795] Updated weights for policy 0, policy_version 180492 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:58,975][24592] Fps is (10 sec: 47514.2, 60 sec: 41915.8, 300 sec: 42765.0). Total num frames: 1478598656. Throughput: 0: 10907.2. Samples: 119623272. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:26:58,977][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:00,553][626795] Updated weights for policy 0, policy_version 180502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:02,180][626795] Updated weights for policy 0, policy_version 180512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:03,926][626795] Updated weights for policy 0, policy_version 180522 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:03,975][24592] Fps is (10 sec: 47513.1, 60 sec: 44373.6, 300 sec: 42765.0). Total num frames: 1478836224. Throughput: 0: 10924.5. Samples: 119694270. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:03,977][24592] Avg episode reward: [(0, '4.936')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000180522_1478836224.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000179277_1468637184.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:05,750][626795] Updated weights for policy 0, policy_version 180532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:07,440][626795] Updated weights for policy 0, policy_version 180542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:08,976][24592] Fps is (10 sec: 46692.5, 60 sec: 44373.1, 300 sec: 42737.2). Total num frames: 1479065600. Throughput: 0: 10902.6. Samples: 119764704. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:08,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:09,220][626795] Updated weights for policy 0, policy_version 180552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:11,034][626795] Updated weights for policy 0, policy_version 180562 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:14,072][24592] Fps is (10 sec: 33264.9, 60 sec: 42257.0, 300 sec: 42306.8). Total num frames: 1479172096. Throughput: 0: 10100.9. Samples: 119764704. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:14,073][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:15,682][626795] Updated weights for policy 0, policy_version 180572 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:17,552][626795] Updated weights for policy 0, policy_version 180582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:18,975][24592] Fps is (10 sec: 31949.6, 60 sec: 42053.0, 300 sec: 42265.2). Total num frames: 1479385088. Throughput: 0: 10084.5. Samples: 119833416. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:18,976][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:19,576][626795] Updated weights for policy 0, policy_version 180592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:21,328][626795] Updated weights for policy 0, policy_version 180602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:23,185][626795] Updated weights for policy 0, policy_version 180612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:23,975][24592] Fps is (10 sec: 43843.0, 60 sec: 41642.7, 300 sec: 42237.4). Total num frames: 1479606272. Throughput: 0: 10699.1. Samples: 119900100. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:23,976][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:24,925][626795] Updated weights for policy 0, policy_version 180622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:26,600][626795] Updated weights for policy 0, policy_version 180632 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:28,371][626795] Updated weights for policy 0, policy_version 180642 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:28,976][24592] Fps is (10 sec: 45874.0, 60 sec: 41642.5, 300 sec: 42681.7). Total num frames: 1479843840. Throughput: 0: 10808.6. Samples: 119935626. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:28,977][24592] Avg episode reward: [(0, '4.403')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:30,060][626795] Updated weights for policy 0, policy_version 180652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:31,763][626795] Updated weights for policy 0, policy_version 180662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:33,555][626795] Updated weights for policy 0, policy_version 180672 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:33,975][24592] Fps is (10 sec: 47513.7, 60 sec: 41779.4, 300 sec: 42709.5). Total num frames: 1480081408. Throughput: 0: 10886.3. Samples: 120006318. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:33,976][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:35,255][626795] Updated weights for policy 0, policy_version 180682 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:36,939][626795] Updated weights for policy 0, policy_version 180692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:38,750][626795] Updated weights for policy 0, policy_version 180702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:38,975][24592] Fps is (10 sec: 47514.4, 60 sec: 44100.7, 300 sec: 42737.3). Total num frames: 1480318976. Throughput: 0: 10888.4. Samples: 120077454. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:38,977][24592] Avg episode reward: [(0, '5.051')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:40,402][626795] Updated weights for policy 0, policy_version 180712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:42,111][626795] Updated weights for policy 0, policy_version 180722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:43,916][626795] Updated weights for policy 0, policy_version 180732 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:43,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44373.4, 300 sec: 42737.3). Total num frames: 1480556544. Throughput: 0: 10897.2. Samples: 120113646. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:43,976][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:45,517][626795] Updated weights for policy 0, policy_version 180742 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:49,267][24592] Fps is (10 sec: 34228.7, 60 sec: 42256.7, 300 sec: 42334.5). Total num frames: 1480671232. Throughput: 0: 10043.3. Samples: 120149142. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:49,269][24592] Avg episode reward: [(0, '4.891')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:50,334][626795] Updated weights for policy 0, policy_version 180752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:52,256][626795] Updated weights for policy 0, policy_version 180762 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:53,943][626795] Updated weights for policy 0, policy_version 180772 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:53,976][24592] Fps is (10 sec: 32766.9, 60 sec: 42052.0, 300 sec: 42320.7). Total num frames: 1480884224. Throughput: 0: 10077.0. Samples: 120218172. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:53,977][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:55,953][626795] Updated weights for policy 0, policy_version 180782 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:57,354][626772] Signal inference workers to stop experience collection... (1700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:57,362][626772] Signal inference workers to resume experience collection... (1700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:57,369][626795] InferenceWorker_p0-w0: stopping experience collection (1700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:57,373][626795] InferenceWorker_p0-w0: resuming experience collection (1700 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:57,909][626795] Updated weights for policy 0, policy_version 180792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:58,975][24592] Fps is (10 sec: 43877.0, 60 sec: 41642.6, 300 sec: 42237.5). Total num frames: 1481097216. Throughput: 0: 10805.0. Samples: 120249882. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:27:58,978][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:27:59,448][626795] Updated weights for policy 0, policy_version 180802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:01,323][626795] Updated weights for policy 0, policy_version 180812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:02,965][626795] Updated weights for policy 0, policy_version 180822 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:03,975][24592] Fps is (10 sec: 45876.9, 60 sec: 41779.3, 300 sec: 42737.2). Total num frames: 1481342976. Throughput: 0: 10820.5. Samples: 120320340. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:03,976][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:04,592][626795] Updated weights for policy 0, policy_version 180832 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:06,356][626795] Updated weights for policy 0, policy_version 180842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:08,146][626795] Updated weights for policy 0, policy_version 180852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:08,975][24592] Fps is (10 sec: 48332.3, 60 sec: 41915.9, 300 sec: 42737.2). Total num frames: 1481580544. Throughput: 0: 10931.6. Samples: 120392022. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:08,976][24592] Avg episode reward: [(0, '5.030')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:09,824][626795] Updated weights for policy 0, policy_version 180862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:11,589][626795] Updated weights for policy 0, policy_version 180872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:13,301][626795] Updated weights for policy 0, policy_version 180882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:13,976][24592] Fps is (10 sec: 46690.3, 60 sec: 44034.3, 300 sec: 42764.9). Total num frames: 1481809920. Throughput: 0: 10936.8. Samples: 120427788. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:13,979][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:14,988][626795] Updated weights for policy 0, policy_version 180892 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:16,771][626795] Updated weights for policy 0, policy_version 180902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:18,521][626795] Updated weights for policy 0, policy_version 180912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:18,975][24592] Fps is (10 sec: 46694.9, 60 sec: 44373.4, 300 sec: 42765.0). Total num frames: 1482047488. Throughput: 0: 10935.6. Samples: 120498420. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:18,976][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:20,263][626795] Updated weights for policy 0, policy_version 180922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:24,460][24592] Fps is (10 sec: 34380.1, 60 sec: 42392.2, 300 sec: 42334.4). Total num frames: 1482170368. Throughput: 0: 10045.2. Samples: 120534360. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:24,462][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:24,815][626795] Updated weights for policy 0, policy_version 180932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:26,755][626795] Updated weights for policy 0, policy_version 180942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:28,647][626795] Updated weights for policy 0, policy_version 180952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:28,976][24592] Fps is (10 sec: 32766.1, 60 sec: 42188.6, 300 sec: 42320.7). Total num frames: 1482375168. Throughput: 0: 10122.4. Samples: 120569160. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:28,978][24592] Avg episode reward: [(0, '4.316')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:30,447][626795] Updated weights for policy 0, policy_version 180962 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:32,252][626795] Updated weights for policy 0, policy_version 180972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:33,975][24592] Fps is (10 sec: 44770.0, 60 sec: 41915.7, 300 sec: 42265.2). Total num frames: 1482596352. Throughput: 0: 10871.9. Samples: 120635208. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:33,977][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:34,007][626795] Updated weights for policy 0, policy_version 180982 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:35,820][626795] Updated weights for policy 0, policy_version 180992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:37,465][626795] Updated weights for policy 0, policy_version 181002 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:38,975][24592] Fps is (10 sec: 45877.8, 60 sec: 41915.8, 300 sec: 42737.3). Total num frames: 1482833920. Throughput: 0: 10855.7. Samples: 120706674. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:38,976][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:39,267][626795] Updated weights for policy 0, policy_version 181012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:40,880][626795] Updated weights for policy 0, policy_version 181022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:42,669][626795] Updated weights for policy 0, policy_version 181032 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:43,975][24592] Fps is (10 sec: 47513.9, 60 sec: 41915.8, 300 sec: 42737.2). Total num frames: 1483071488. Throughput: 0: 10929.7. Samples: 120741720. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:43,976][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:44,475][626795] Updated weights for policy 0, policy_version 181042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:46,245][626795] Updated weights for policy 0, policy_version 181052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:47,827][626795] Updated weights for policy 0, policy_version 181062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:48,976][24592] Fps is (10 sec: 47512.5, 60 sec: 44178.1, 300 sec: 42765.0). Total num frames: 1483309056. Throughput: 0: 10953.4. Samples: 120813246. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:48,978][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:49,509][626795] Updated weights for policy 0, policy_version 181072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:51,297][626795] Updated weights for policy 0, policy_version 181082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:53,087][626795] Updated weights for policy 0, policy_version 181092 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:53,976][24592] Fps is (10 sec: 47512.0, 60 sec: 44373.4, 300 sec: 42792.7). Total num frames: 1483546624. Throughput: 0: 10938.2. Samples: 120884244. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:53,977][24592] Avg episode reward: [(0, '4.855')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:54,809][626795] Updated weights for policy 0, policy_version 181102 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:28:56,580][626795] Updated weights for policy 0, policy_version 181112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:59,676][24592] Fps is (10 sec: 33685.3, 60 sec: 42376.6, 300 sec: 42303.6). Total num frames: 1483669504. Throughput: 0: 10758.0. Samples: 120919428. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:28:59,677][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:01,266][626795] Updated weights for policy 0, policy_version 181122 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:03,146][626795] Updated weights for policy 0, policy_version 181132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:03,975][24592] Fps is (10 sec: 31950.0, 60 sec: 42052.3, 300 sec: 42292.9). Total num frames: 1483866112. Throughput: 0: 10110.7. Samples: 120953400. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:03,976][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000181136_1483866112.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:04,047][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000179891_1473667072.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:05,036][626795] Updated weights for policy 0, policy_version 181142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:06,962][626795] Updated weights for policy 0, policy_version 181152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:08,758][626795] Updated weights for policy 0, policy_version 181162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:08,976][24592] Fps is (10 sec: 44926.8, 60 sec: 41779.2, 300 sec: 42265.2). Total num frames: 1484087296. Throughput: 0: 10892.3. Samples: 121019232. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:08,989][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:10,634][626795] Updated weights for policy 0, policy_version 181172 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:12,446][626795] Updated weights for policy 0, policy_version 181182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:13,976][24592] Fps is (10 sec: 44235.7, 60 sec: 41643.1, 300 sec: 42681.8). Total num frames: 1484308480. Throughput: 0: 10734.7. Samples: 121052220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:13,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:14,247][626795] Updated weights for policy 0, policy_version 181192 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:15,902][626795] Updated weights for policy 0, policy_version 181202 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:17,723][626795] Updated weights for policy 0, policy_version 181212 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:18,975][24592] Fps is (10 sec: 45875.2, 60 sec: 41642.6, 300 sec: 42681.7). Total num frames: 1484546048. Throughput: 0: 10815.3. Samples: 121121898. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:18,978][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:19,449][626795] Updated weights for policy 0, policy_version 181222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:21,193][626795] Updated weights for policy 0, policy_version 181232 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:22,921][626795] Updated weights for policy 0, policy_version 181242 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:23,975][24592] Fps is (10 sec: 47514.6, 60 sec: 43909.1, 300 sec: 42737.3). Total num frames: 1484783616. Throughput: 0: 10811.9. Samples: 121193208. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:23,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:24,717][626795] Updated weights for policy 0, policy_version 181252 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:26,287][626795] Updated weights for policy 0, policy_version 181262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:28,132][626795] Updated weights for policy 0, policy_version 181272 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:28,975][24592] Fps is (10 sec: 46694.3, 60 sec: 43964.1, 300 sec: 42709.5). Total num frames: 1485012992. Throughput: 0: 10817.3. Samples: 121228500. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:28,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:29,775][626795] Updated weights for policy 0, policy_version 181282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:31,580][626795] Updated weights for policy 0, policy_version 181292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:34,468][24592] Fps is (10 sec: 35912.7, 60 sec: 42386.7, 300 sec: 42333.3). Total num frames: 1485160448. Throughput: 0: 9922.1. Samples: 121264632. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:34,469][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:35,858][626795] Updated weights for policy 0, policy_version 181302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:37,725][626795] Updated weights for policy 0, policy_version 181312 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:38,975][24592] Fps is (10 sec: 35225.7, 60 sec: 42188.8, 300 sec: 42320.7). Total num frames: 1485365248. Throughput: 0: 10108.1. Samples: 121339104. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:38,977][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:39,459][626795] Updated weights for policy 0, policy_version 181322 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:41,237][626795] Updated weights for policy 0, policy_version 181332 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:42,919][626795] Updated weights for policy 0, policy_version 181342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:43,975][24592] Fps is (10 sec: 45669.0, 60 sec: 42052.2, 300 sec: 42320.7). Total num frames: 1485594624. Throughput: 0: 10243.9. Samples: 121373226. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:43,978][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:44,752][626795] Updated weights for policy 0, policy_version 181352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:46,374][626795] Updated weights for policy 0, policy_version 181362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:48,135][626795] Updated weights for policy 0, policy_version 181372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:48,975][24592] Fps is (10 sec: 46694.5, 60 sec: 42052.4, 300 sec: 42792.8). Total num frames: 1485832192. Throughput: 0: 10916.7. Samples: 121444650. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:48,976][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:49,876][626795] Updated weights for policy 0, policy_version 181382 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:51,576][626795] Updated weights for policy 0, policy_version 181392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:53,348][626795] Updated weights for policy 0, policy_version 181402 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:53,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42052.5, 300 sec: 42820.6). Total num frames: 1486069760. Throughput: 0: 11029.6. Samples: 121515564. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:53,976][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:55,097][626795] Updated weights for policy 0, policy_version 181412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:56,855][626795] Updated weights for policy 0, policy_version 181422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:29:58,568][626795] Updated weights for policy 0, policy_version 181432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:58,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44483.2, 300 sec: 42848.3). Total num frames: 1486307328. Throughput: 0: 11080.2. Samples: 121550826. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:29:58,976][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:00,425][626795] Updated weights for policy 0, policy_version 181442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:02,154][626795] Updated weights for policy 0, policy_version 181452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:03,876][626795] Updated weights for policy 0, policy_version 181462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:03,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44509.8, 300 sec: 42820.6). Total num frames: 1486536704. Throughput: 0: 11077.1. Samples: 121620366. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:03,977][24592] Avg episode reward: [(0, '4.805')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:05,654][626795] Updated weights for policy 0, policy_version 181472 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:09,842][24592] Fps is (10 sec: 33171.3, 60 sec: 42395.9, 300 sec: 42362.9). Total num frames: 1486667776. Throughput: 0: 10076.6. Samples: 121655382. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:09,842][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:10,650][626795] Updated weights for policy 0, policy_version 181482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:12,585][626795] Updated weights for policy 0, policy_version 181492 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:13,975][24592] Fps is (10 sec: 30310.0, 60 sec: 42188.9, 300 sec: 42265.1). Total num frames: 1486839808. Throughput: 0: 10179.4. Samples: 121686576. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:13,977][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:14,528][626795] Updated weights for policy 0, policy_version 181502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:16,483][626795] Updated weights for policy 0, policy_version 181512 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:18,215][626795] Updated weights for policy 0, policy_version 181522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:18,976][24592] Fps is (10 sec: 43049.0, 60 sec: 41915.5, 300 sec: 42237.3). Total num frames: 1487060992. Throughput: 0: 10917.6. Samples: 121750548. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:18,977][24592] Avg episode reward: [(0, '4.856')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:20,089][626795] Updated weights for policy 0, policy_version 181532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:21,835][626795] Updated weights for policy 0, policy_version 181542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:23,613][626795] Updated weights for policy 0, policy_version 181552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:23,976][24592] Fps is (10 sec: 44236.6, 60 sec: 41642.6, 300 sec: 42653.9). Total num frames: 1487282176. Throughput: 0: 10667.6. Samples: 121819146. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:23,977][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:25,406][626795] Updated weights for policy 0, policy_version 181562 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:27,166][626795] Updated weights for policy 0, policy_version 181572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:28,907][626795] Updated weights for policy 0, policy_version 181582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:28,975][24592] Fps is (10 sec: 45877.2, 60 sec: 41779.2, 300 sec: 42709.5). Total num frames: 1487519744. Throughput: 0: 10681.6. Samples: 121853898. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:28,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:30,621][626795] Updated weights for policy 0, policy_version 181592 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:32,351][626795] Updated weights for policy 0, policy_version 181602 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:33,975][24592] Fps is (10 sec: 47514.5, 60 sec: 43639.7, 300 sec: 42737.3). Total num frames: 1487757312. Throughput: 0: 10680.5. Samples: 121925274. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:33,976][24592] Avg episode reward: [(0, '4.849')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:34,062][626795] Updated weights for policy 0, policy_version 181612 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:35,822][626795] Updated weights for policy 0, policy_version 181622 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:37,550][626795] Updated weights for policy 0, policy_version 181632 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:38,975][24592] Fps is (10 sec: 47513.0, 60 sec: 43827.1, 300 sec: 42737.2). Total num frames: 1487994880. Throughput: 0: 10681.8. Samples: 121996248. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:38,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:39,243][626795] Updated weights for policy 0, policy_version 181642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:40,975][626795] Updated weights for policy 0, policy_version 181652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:44,612][24592] Fps is (10 sec: 35426.7, 60 sec: 41880.8, 300 sec: 42312.7). Total num frames: 1488134144. Throughput: 0: 10533.8. Samples: 122031558. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:44,613][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:45,367][626795] Updated weights for policy 0, policy_version 181662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:47,109][626795] Updated weights for policy 0, policy_version 181672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:48,944][626795] Updated weights for policy 0, policy_version 181682 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:48,975][24592] Fps is (10 sec: 34406.9, 60 sec: 41779.2, 300 sec: 42320.8). Total num frames: 1488338944. Throughput: 0: 10011.7. Samples: 122070894. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:48,977][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:50,699][626795] Updated weights for policy 0, policy_version 181692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:52,485][626795] Updated weights for policy 0, policy_version 181702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:53,981][24592] Fps is (10 sec: 46341.4, 60 sec: 41638.5, 300 sec: 42319.8). Total num frames: 1488568320. Throughput: 0: 10988.2. Samples: 122140398. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:53,983][24592] Avg episode reward: [(0, '5.073')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:54,192][626795] Updated weights for policy 0, policy_version 181712 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:55,986][626795] Updated weights for policy 0, policy_version 181722 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:57,646][626795] Updated weights for policy 0, policy_version 181732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:58,976][24592] Fps is (10 sec: 46689.8, 60 sec: 41642.0, 300 sec: 42820.5). Total num frames: 1488805888. Throughput: 0: 10868.3. Samples: 122175660. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:30:58,977][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:30:59,404][626795] Updated weights for policy 0, policy_version 181742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:01,140][626795] Updated weights for policy 0, policy_version 181752 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:02,858][626795] Updated weights for policy 0, policy_version 181762 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:03,975][24592] Fps is (10 sec: 47542.1, 60 sec: 41779.2, 300 sec: 42848.3). Total num frames: 1489043456. Throughput: 0: 11025.6. Samples: 122246694. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:03,976][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000181768_1489043456.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:04,037][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000180522_1478836224.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:04,617][626795] Updated weights for policy 0, policy_version 181772 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:06,345][626795] Updated weights for policy 0, policy_version 181782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:08,071][626795] Updated weights for policy 0, policy_version 181792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:08,976][24592] Fps is (10 sec: 47516.1, 60 sec: 44191.9, 300 sec: 42876.0). Total num frames: 1489281024. Throughput: 0: 11061.5. Samples: 122316918. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:08,977][24592] Avg episode reward: [(0, '4.898')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:09,943][626795] Updated weights for policy 0, policy_version 181802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:11,663][626795] Updated weights for policy 0, policy_version 181812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:13,502][626795] Updated weights for policy 0, policy_version 181822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:13,976][24592] Fps is (10 sec: 46689.7, 60 sec: 44509.2, 300 sec: 42876.1). Total num frames: 1489510400. Throughput: 0: 11065.0. Samples: 122351832. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:13,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:15,202][626795] Updated weights for policy 0, policy_version 181832 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:19,946][24592] Fps is (10 sec: 32855.6, 60 sec: 42323.3, 300 sec: 42347.9). Total num frames: 1489641472. Throughput: 0: 10032.4. Samples: 122386476. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:19,947][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:20,164][626795] Updated weights for policy 0, policy_version 181842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:21,945][626795] Updated weights for policy 0, policy_version 181852 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:23,891][626795] Updated weights for policy 0, policy_version 181862 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:23,975][24592] Fps is (10 sec: 30313.5, 60 sec: 42188.9, 300 sec: 42265.2). Total num frames: 1489813504. Throughput: 0: 10110.3. Samples: 122451210. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:23,976][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:25,755][626795] Updated weights for policy 0, policy_version 181872 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:27,381][626795] Updated weights for policy 0, policy_version 181882 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:28,975][24592] Fps is (10 sec: 45365.7, 60 sec: 42188.8, 300 sec: 42293.0). Total num frames: 1490051072. Throughput: 0: 10235.8. Samples: 122485650. Policy #0 lag: (min: 1.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:28,976][24592] Avg episode reward: [(0, '4.791')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:29,145][626795] Updated weights for policy 0, policy_version 181892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:30,908][626795] Updated weights for policy 0, policy_version 181902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:32,630][626795] Updated weights for policy 0, policy_version 181912 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:33,976][24592] Fps is (10 sec: 47510.5, 60 sec: 42188.3, 300 sec: 42765.0). Total num frames: 1490288640. Throughput: 0: 10811.7. Samples: 122557428. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:33,986][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:34,321][626795] Updated weights for policy 0, policy_version 181922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:36,054][626795] Updated weights for policy 0, policy_version 181932 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:37,701][626795] Updated weights for policy 0, policy_version 181942 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:38,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42052.3, 300 sec: 42792.8). Total num frames: 1490518016. Throughput: 0: 10838.5. Samples: 122628066. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:38,976][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:39,459][626795] Updated weights for policy 0, policy_version 181952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:41,207][626795] Updated weights for policy 0, policy_version 181962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:42,933][626795] Updated weights for policy 0, policy_version 181972 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:43,975][24592] Fps is (10 sec: 46697.7, 60 sec: 44159.5, 300 sec: 42820.6). Total num frames: 1490755584. Throughput: 0: 10854.5. Samples: 122664102. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:43,977][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:44,705][626795] Updated weights for policy 0, policy_version 181982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:46,502][626795] Updated weights for policy 0, policy_version 181992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:48,102][626795] Updated weights for policy 0, policy_version 182002 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:48,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44236.8, 300 sec: 42820.6). Total num frames: 1490993152. Throughput: 0: 10837.6. Samples: 122734386. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:48,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:49,971][626795] Updated weights for policy 0, policy_version 182012 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:51,708][626795] Updated weights for policy 0, policy_version 182022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:55,047][24592] Fps is (10 sec: 34776.7, 60 sec: 42123.6, 300 sec: 42361.2). Total num frames: 1491140608. Throughput: 0: 9830.2. Samples: 122769804. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:55,049][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:56,414][626795] Updated weights for policy 0, policy_version 182032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:31:58,344][626795] Updated weights for policy 0, policy_version 182042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:58,975][24592] Fps is (10 sec: 31948.9, 60 sec: 41779.9, 300 sec: 42293.0). Total num frames: 1491312640. Throughput: 0: 10058.9. Samples: 122804472. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:31:58,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:00,039][626795] Updated weights for policy 0, policy_version 182052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:01,791][626795] Updated weights for policy 0, policy_version 182062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:03,581][626795] Updated weights for policy 0, policy_version 182072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:03,975][24592] Fps is (10 sec: 44956.5, 60 sec: 41642.6, 300 sec: 42293.0). Total num frames: 1491542016. Throughput: 0: 11052.1. Samples: 122873088. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:03,976][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:05,348][626795] Updated weights for policy 0, policy_version 182082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:07,037][626795] Updated weights for policy 0, policy_version 182092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:08,836][626795] Updated weights for policy 0, policy_version 182102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:08,976][24592] Fps is (10 sec: 47509.3, 60 sec: 41778.9, 300 sec: 42779.0). Total num frames: 1491787776. Throughput: 0: 10960.6. Samples: 122944446. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:08,978][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:10,546][626795] Updated weights for policy 0, policy_version 182112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:12,116][626795] Updated weights for policy 0, policy_version 182122 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:13,918][626795] Updated weights for policy 0, policy_version 182132 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:13,975][24592] Fps is (10 sec: 48332.6, 60 sec: 41916.4, 300 sec: 42848.3). Total num frames: 1492025344. Throughput: 0: 10984.6. Samples: 122979960. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:13,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:15,629][626795] Updated weights for policy 0, policy_version 182142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:17,351][626795] Updated weights for policy 0, policy_version 182152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:18,976][24592] Fps is (10 sec: 47515.2, 60 sec: 44409.0, 300 sec: 42903.8). Total num frames: 1492262912. Throughput: 0: 10983.6. Samples: 123051690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:18,977][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:19,144][626795] Updated weights for policy 0, policy_version 182162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:20,815][626795] Updated weights for policy 0, policy_version 182172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:22,577][626795] Updated weights for policy 0, policy_version 182182 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:23,975][24592] Fps is (10 sec: 46695.1, 60 sec: 44646.4, 300 sec: 42876.1). Total num frames: 1492492288. Throughput: 0: 10951.9. Samples: 123120900. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:23,976][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:24,375][626795] Updated weights for policy 0, policy_version 182192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:26,249][626795] Updated weights for policy 0, policy_version 182202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:30,193][24592] Fps is (10 sec: 33593.2, 60 sec: 42286.1, 300 sec: 42395.6). Total num frames: 1492639744. Throughput: 0: 10626.0. Samples: 123155214. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:30,194][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:30,955][626795] Updated weights for policy 0, policy_version 182212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:32,866][626795] Updated weights for policy 0, policy_version 182222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:33,975][24592] Fps is (10 sec: 31948.6, 60 sec: 42052.7, 300 sec: 42348.5). Total num frames: 1492811776. Throughput: 0: 10105.7. Samples: 123189144. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:33,977][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:34,588][626795] Updated weights for policy 0, policy_version 182232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:36,414][626795] Updated weights for policy 0, policy_version 182242 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:38,064][626795] Updated weights for policy 0, policy_version 182252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:38,975][24592] Fps is (10 sec: 46641.3, 60 sec: 42188.8, 300 sec: 42348.5). Total num frames: 1493049344. Throughput: 0: 11143.4. Samples: 123259320. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:38,976][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:39,735][626795] Updated weights for policy 0, policy_version 182262 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:41,591][626795] Updated weights for policy 0, policy_version 182272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:43,263][626795] Updated weights for policy 0, policy_version 182282 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:43,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42188.8, 300 sec: 42807.3). Total num frames: 1493286912. Throughput: 0: 10910.7. Samples: 123295452. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:43,977][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:44,873][626795] Updated weights for policy 0, policy_version 182292 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:46,661][626795] Updated weights for policy 0, policy_version 182302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:48,395][626795] Updated weights for policy 0, policy_version 182312 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:48,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42188.8, 300 sec: 42848.4). Total num frames: 1493524480. Throughput: 0: 10964.3. Samples: 123366480. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:48,976][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:50,233][626795] Updated weights for policy 0, policy_version 182322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:51,867][626795] Updated weights for policy 0, policy_version 182332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:53,550][626795] Updated weights for policy 0, policy_version 182342 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:53,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44345.9, 300 sec: 42903.9). Total num frames: 1493753856. Throughput: 0: 10958.0. Samples: 123437544. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:53,976][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:55,321][626795] Updated weights for policy 0, policy_version 182352 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:57,154][626795] Updated weights for policy 0, policy_version 182362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:32:58,916][626795] Updated weights for policy 0, policy_version 182372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:58,976][24592] Fps is (10 sec: 46691.1, 60 sec: 44645.9, 300 sec: 42876.0). Total num frames: 1493991424. Throughput: 0: 10944.7. Samples: 123472476. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:32:58,978][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:00,700][626795] Updated weights for policy 0, policy_version 182382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:02,413][626795] Updated weights for policy 0, policy_version 182392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:05,386][24592] Fps is (10 sec: 35178.8, 60 sec: 42553.8, 300 sec: 42423.3). Total num frames: 1494155264. Throughput: 0: 9826.3. Samples: 123507726. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:05,387][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:05,391][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000182392_1494155264.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:05,453][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000181136_1483866112.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:07,381][626795] Updated weights for policy 0, policy_version 182402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:08,975][24592] Fps is (10 sec: 31950.9, 60 sec: 42052.9, 300 sec: 42376.4). Total num frames: 1494310912. Throughput: 0: 10085.7. Samples: 123574758. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:08,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:09,200][626795] Updated weights for policy 0, policy_version 182412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:10,879][626795] Updated weights for policy 0, policy_version 182422 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:12,681][626795] Updated weights for policy 0, policy_version 182432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:13,976][24592] Fps is (10 sec: 44823.4, 60 sec: 41915.6, 300 sec: 42348.4). Total num frames: 1494540288. Throughput: 0: 10380.7. Samples: 123609702. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:13,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:14,267][626795] Updated weights for policy 0, policy_version 182442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:16,122][626795] Updated weights for policy 0, policy_version 182452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:17,761][626795] Updated weights for policy 0, policy_version 182462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:18,975][24592] Fps is (10 sec: 46694.3, 60 sec: 41916.1, 300 sec: 42807.6). Total num frames: 1494777856. Throughput: 0: 10932.4. Samples: 123681102. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:18,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:19,543][626795] Updated weights for policy 0, policy_version 182472 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:21,173][626795] Updated weights for policy 0, policy_version 182482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:23,025][626795] Updated weights for policy 0, policy_version 182492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:23,975][24592] Fps is (10 sec: 47515.0, 60 sec: 42052.2, 300 sec: 42848.4). Total num frames: 1495015424. Throughput: 0: 10929.9. Samples: 123751164. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:23,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:24,772][626795] Updated weights for policy 0, policy_version 182502 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:26,559][626795] Updated weights for policy 0, policy_version 182512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:28,212][626795] Updated weights for policy 0, policy_version 182522 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:28,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44456.6, 300 sec: 42903.9). Total num frames: 1495252992. Throughput: 0: 10928.3. Samples: 123787224. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:28,985][24592] Avg episode reward: [(0, '5.128')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:30,008][626795] Updated weights for policy 0, policy_version 182532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:31,751][626795] Updated weights for policy 0, policy_version 182542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:33,441][626795] Updated weights for policy 0, policy_version 182552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:33,975][24592] Fps is (10 sec: 46694.1, 60 sec: 44509.8, 300 sec: 42876.1). Total num frames: 1495482368. Throughput: 0: 10909.4. Samples: 123857406. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:33,977][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:35,216][626795] Updated weights for policy 0, policy_version 182562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:37,163][626795] Updated weights for policy 0, policy_version 182572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:40,588][24592] Fps is (10 sec: 33860.1, 60 sec: 42148.0, 300 sec: 42394.4). Total num frames: 1495646208. Throughput: 0: 9770.2. Samples: 123892962. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:40,590][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:42,053][626795] Updated weights for policy 0, policy_version 182582 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:43,765][626795] Updated weights for policy 0, policy_version 182592 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:43,975][24592] Fps is (10 sec: 31129.7, 60 sec: 41779.2, 300 sec: 42320.7). Total num frames: 1495793664. Throughput: 0: 10063.5. Samples: 123925326. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:43,976][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:45,582][626795] Updated weights for policy 0, policy_version 182602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:47,328][626795] Updated weights for policy 0, policy_version 182612 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:48,975][24592] Fps is (10 sec: 45907.0, 60 sec: 41779.1, 300 sec: 42320.7). Total num frames: 1496031232. Throughput: 0: 11153.4. Samples: 123993900. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:48,978][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:49,147][626795] Updated weights for policy 0, policy_version 182622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:50,845][626795] Updated weights for policy 0, policy_version 182632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:52,580][626795] Updated weights for policy 0, policy_version 182642 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:53,975][24592] Fps is (10 sec: 46694.4, 60 sec: 41779.2, 300 sec: 42783.3). Total num frames: 1496260608. Throughput: 0: 10887.2. Samples: 124064682. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:53,976][24592] Avg episode reward: [(0, '4.890')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:54,266][626795] Updated weights for policy 0, policy_version 182652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:56,018][626795] Updated weights for policy 0, policy_version 182662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:57,783][626795] Updated weights for policy 0, policy_version 182672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:58,384][626772] Signal inference workers to stop experience collection... (1750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:58,384][626772] Signal inference workers to resume experience collection... (1750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:58,397][626795] InferenceWorker_p0-w0: stopping experience collection (1750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:58,400][626795] InferenceWorker_p0-w0: resuming experience collection (1750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:58,976][24592] Fps is (10 sec: 46694.0, 60 sec: 41779.5, 300 sec: 42820.5). Total num frames: 1496498176. Throughput: 0: 10894.4. Samples: 124099950. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:33:58,978][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:33:59,492][626795] Updated weights for policy 0, policy_version 182682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:01,204][626795] Updated weights for policy 0, policy_version 182692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:02,975][626795] Updated weights for policy 0, policy_version 182702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:03,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44043.4, 300 sec: 42876.1). Total num frames: 1496735744. Throughput: 0: 10873.3. Samples: 124170402. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:03,976][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:04,700][626795] Updated weights for policy 0, policy_version 182712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:06,361][626795] Updated weights for policy 0, policy_version 182722 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:08,146][626795] Updated weights for policy 0, policy_version 182732 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:08,975][24592] Fps is (10 sec: 47514.6, 60 sec: 44373.4, 300 sec: 42931.7). Total num frames: 1496973312. Throughput: 0: 10903.7. Samples: 124241832. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:08,976][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:09,803][626795] Updated weights for policy 0, policy_version 182742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:11,642][626795] Updated weights for policy 0, policy_version 182752 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:15,798][24592] Fps is (10 sec: 35337.9, 60 sec: 42270.2, 300 sec: 42474.8). Total num frames: 1497153536. Throughput: 0: 10463.8. Samples: 124277166. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:15,799][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:16,577][626795] Updated weights for policy 0, policy_version 182762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:18,538][626795] Updated weights for policy 0, policy_version 182772 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:18,975][24592] Fps is (10 sec: 31129.3, 60 sec: 41779.2, 300 sec: 42376.2). Total num frames: 1497284608. Throughput: 0: 10043.7. Samples: 124309374. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:18,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:20,286][626795] Updated weights for policy 0, policy_version 182782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:22,037][626795] Updated weights for policy 0, policy_version 182792 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:23,820][626795] Updated weights for policy 0, policy_version 182802 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:23,975][24592] Fps is (10 sec: 45080.9, 60 sec: 41779.2, 300 sec: 42404.0). Total num frames: 1497522176. Throughput: 0: 11173.4. Samples: 124377744. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:23,976][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:25,552][626795] Updated weights for policy 0, policy_version 182812 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:27,198][626795] Updated weights for policy 0, policy_version 182822 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:28,975][24592] Fps is (10 sec: 46694.6, 60 sec: 41642.7, 300 sec: 42753.2). Total num frames: 1497751552. Throughput: 0: 10837.1. Samples: 124412994. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:28,976][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:29,079][626795] Updated weights for policy 0, policy_version 182832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:30,659][626795] Updated weights for policy 0, policy_version 182842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:32,476][626795] Updated weights for policy 0, policy_version 182852 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:33,975][24592] Fps is (10 sec: 46694.1, 60 sec: 41779.2, 300 sec: 42792.8). Total num frames: 1497989120. Throughput: 0: 10900.5. Samples: 124484424. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:33,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:34,107][626795] Updated weights for policy 0, policy_version 182862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:35,897][626795] Updated weights for policy 0, policy_version 182872 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:37,580][626795] Updated weights for policy 0, policy_version 182882 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:38,975][24592] Fps is (10 sec: 48332.9, 60 sec: 44336.5, 300 sec: 42848.3). Total num frames: 1498234880. Throughput: 0: 10912.3. Samples: 124555734. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:38,976][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:39,355][626795] Updated weights for policy 0, policy_version 182892 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:40,950][626795] Updated weights for policy 0, policy_version 182902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:42,724][626795] Updated weights for policy 0, policy_version 182912 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:43,976][24592] Fps is (10 sec: 48332.1, 60 sec: 44646.2, 300 sec: 42848.3). Total num frames: 1498472448. Throughput: 0: 10923.2. Samples: 124591494. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:43,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:44,524][626795] Updated weights for policy 0, policy_version 182922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:46,175][626795] Updated weights for policy 0, policy_version 182932 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:50,907][24592] Fps is (10 sec: 35016.2, 60 sec: 42328.2, 300 sec: 42376.5). Total num frames: 1498652672. Throughput: 0: 10453.0. Samples: 124660974. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:50,908][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:50,953][626795] Updated weights for policy 0, policy_version 182942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:52,977][626795] Updated weights for policy 0, policy_version 182952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:53,975][24592] Fps is (10 sec: 31130.3, 60 sec: 42052.2, 300 sec: 42292.9). Total num frames: 1498783744. Throughput: 0: 10040.4. Samples: 124693650. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:53,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:54,943][626795] Updated weights for policy 0, policy_version 182962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:56,674][626795] Updated weights for policy 0, policy_version 182972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:34:58,415][626795] Updated weights for policy 0, policy_version 182982 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:58,976][24592] Fps is (10 sec: 44671.8, 60 sec: 41915.7, 300 sec: 42292.9). Total num frames: 1499013120. Throughput: 0: 10411.3. Samples: 124726698. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:34:58,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:00,096][626795] Updated weights for policy 0, policy_version 182992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:01,887][626795] Updated weights for policy 0, policy_version 183002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:03,580][626795] Updated weights for policy 0, policy_version 183012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:03,976][24592] Fps is (10 sec: 46693.2, 60 sec: 41915.5, 300 sec: 42779.5). Total num frames: 1499250688. Throughput: 0: 10859.1. Samples: 124798038. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:03,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000183014_1499250688.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:04,032][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000181768_1489043456.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:05,330][626795] Updated weights for policy 0, policy_version 183022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:07,018][626795] Updated weights for policy 0, policy_version 183032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:08,673][626795] Updated weights for policy 0, policy_version 183042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:08,975][24592] Fps is (10 sec: 47514.6, 60 sec: 41915.7, 300 sec: 42876.1). Total num frames: 1499488256. Throughput: 0: 10932.8. Samples: 124869720. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:08,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:10,475][626795] Updated weights for policy 0, policy_version 183052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:12,236][626795] Updated weights for policy 0, policy_version 183062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:13,846][626795] Updated weights for policy 0, policy_version 183072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:13,975][24592] Fps is (10 sec: 47514.7, 60 sec: 44214.6, 300 sec: 42931.7). Total num frames: 1499725824. Throughput: 0: 10934.1. Samples: 124905030. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:13,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:15,591][626795] Updated weights for policy 0, policy_version 183082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:17,398][626795] Updated weights for policy 0, policy_version 183092 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:18,975][24592] Fps is (10 sec: 47513.1, 60 sec: 44646.4, 300 sec: 42987.2). Total num frames: 1499963392. Throughput: 0: 10940.3. Samples: 124976736. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:18,977][24592] Avg episode reward: [(0, '4.960')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:19,037][626795] Updated weights for policy 0, policy_version 183102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:20,787][626795] Updated weights for policy 0, policy_version 183112 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:22,743][626795] Updated weights for policy 0, policy_version 183122 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:26,034][24592] Fps is (10 sec: 35325.2, 60 sec: 42373.1, 300 sec: 42523.8). Total num frames: 1500151808. Throughput: 0: 9707.6. Samples: 125012562. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:26,035][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:27,461][626795] Updated weights for policy 0, policy_version 183132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:28,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42052.2, 300 sec: 42431.8). Total num frames: 1500274688. Throughput: 0: 10097.0. Samples: 125045856. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:28,976][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:29,434][626795] Updated weights for policy 0, policy_version 183142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:31,342][626795] Updated weights for policy 0, policy_version 183152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:33,033][626795] Updated weights for policy 0, policy_version 183162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:33,975][24592] Fps is (10 sec: 43327.3, 60 sec: 41779.3, 300 sec: 42376.3). Total num frames: 1500495872. Throughput: 0: 10462.7. Samples: 125111586. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:33,976][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:34,776][626795] Updated weights for policy 0, policy_version 183172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:36,554][626795] Updated weights for policy 0, policy_version 183182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:38,315][626795] Updated weights for policy 0, policy_version 183192 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:38,975][24592] Fps is (10 sec: 45875.6, 60 sec: 41642.7, 300 sec: 42801.9). Total num frames: 1500733440. Throughput: 0: 10839.9. Samples: 125181444. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:38,978][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:40,167][626795] Updated weights for policy 0, policy_version 183202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:41,914][626795] Updated weights for policy 0, policy_version 183212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:43,586][626795] Updated weights for policy 0, policy_version 183222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:43,975][24592] Fps is (10 sec: 47513.4, 60 sec: 41642.8, 300 sec: 42820.5). Total num frames: 1500971008. Throughput: 0: 10882.8. Samples: 125216424. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:43,976][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:45,399][626795] Updated weights for policy 0, policy_version 183232 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:47,166][626795] Updated weights for policy 0, policy_version 183242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:48,967][626795] Updated weights for policy 0, policy_version 183252 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:48,975][24592] Fps is (10 sec: 46694.5, 60 sec: 43874.2, 300 sec: 42821.4). Total num frames: 1501200384. Throughput: 0: 10835.9. Samples: 125285652. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:48,976][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:50,740][626795] Updated weights for policy 0, policy_version 183262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:52,608][626795] Updated weights for policy 0, policy_version 183272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:53,975][24592] Fps is (10 sec: 45055.6, 60 sec: 43963.7, 300 sec: 42765.1). Total num frames: 1501421568. Throughput: 0: 10770.9. Samples: 125354412. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:35:53,976][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:54,345][626795] Updated weights for policy 0, policy_version 183282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:56,029][626795] Updated weights for policy 0, policy_version 183292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:35:57,839][626795] Updated weights for policy 0, policy_version 183302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:00,463][24592] Fps is (10 sec: 37082.2, 60 sec: 42500.6, 300 sec: 42439.9). Total num frames: 1501626368. Throughput: 0: 10410.3. Samples: 125388978. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:00,464][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:01,845][626795] Updated weights for policy 0, policy_version 183312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:03,588][626795] Updated weights for policy 0, policy_version 183322 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:03,975][24592] Fps is (10 sec: 36864.5, 60 sec: 42325.5, 300 sec: 42404.1). Total num frames: 1501790208. Throughput: 0: 10146.3. Samples: 125433318. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:03,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:05,412][626795] Updated weights for policy 0, policy_version 183332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:07,155][626795] Updated weights for policy 0, policy_version 183342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:08,877][626795] Updated weights for policy 0, policy_version 183352 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:08,975][24592] Fps is (10 sec: 47155.5, 60 sec: 42325.4, 300 sec: 42431.9). Total num frames: 1502027776. Throughput: 0: 11431.0. Samples: 125503422. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:08,977][24592] Avg episode reward: [(0, '4.902')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:10,553][626795] Updated weights for policy 0, policy_version 183362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:12,282][626795] Updated weights for policy 0, policy_version 183372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:13,946][626795] Updated weights for policy 0, policy_version 183382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:13,976][24592] Fps is (10 sec: 47512.1, 60 sec: 42325.1, 300 sec: 42934.1). Total num frames: 1502265344. Throughput: 0: 10956.9. Samples: 125538918. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:13,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:15,695][626795] Updated weights for policy 0, policy_version 183392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:17,414][626795] Updated weights for policy 0, policy_version 183402 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:18,975][24592] Fps is (10 sec: 47513.2, 60 sec: 42325.4, 300 sec: 43014.9). Total num frames: 1502502912. Throughput: 0: 11103.2. Samples: 125611230. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:18,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:19,050][626795] Updated weights for policy 0, policy_version 183412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:20,815][626795] Updated weights for policy 0, policy_version 183422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:22,538][626795] Updated weights for policy 0, policy_version 183432 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:23,975][24592] Fps is (10 sec: 47514.9, 60 sec: 44677.7, 300 sec: 43014.9). Total num frames: 1502740480. Throughput: 0: 11161.3. Samples: 125683704. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:23,976][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:24,114][626795] Updated weights for policy 0, policy_version 183442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:25,921][626795] Updated weights for policy 0, policy_version 183452 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:27,620][626795] Updated weights for policy 0, policy_version 183462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:28,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45192.6, 300 sec: 43042.8). Total num frames: 1502986240. Throughput: 0: 11166.8. Samples: 125718930. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:28,976][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:29,403][626795] Updated weights for policy 0, policy_version 183472 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:31,014][626795] Updated weights for policy 0, policy_version 183482 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:36,064][24592] Fps is (10 sec: 34561.4, 60 sec: 42880.8, 300 sec: 42547.1). Total num frames: 1503158272. Throughput: 0: 10728.7. Samples: 125790852. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:36,066][24592] Avg episode reward: [(0, '4.934')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:36,158][626795] Updated weights for policy 0, policy_version 183492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:37,823][626795] Updated weights for policy 0, policy_version 183502 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:38,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42734.9, 300 sec: 42515.1). Total num frames: 1503297536. Throughput: 0: 10395.1. Samples: 125822190. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:38,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:39,602][626795] Updated weights for policy 0, policy_version 183512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:41,331][626795] Updated weights for policy 0, policy_version 183522 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:43,016][626795] Updated weights for policy 0, policy_version 183532 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:43,975][24592] Fps is (10 sec: 47630.8, 60 sec: 42735.0, 300 sec: 42515.1). Total num frames: 1503535104. Throughput: 0: 10768.6. Samples: 125857548. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:43,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:44,686][626795] Updated weights for policy 0, policy_version 183542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:46,477][626795] Updated weights for policy 0, policy_version 183552 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:48,207][626795] Updated weights for policy 0, policy_version 183562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:48,976][24592] Fps is (10 sec: 47511.7, 60 sec: 42871.2, 300 sec: 42976.6). Total num frames: 1503772672. Throughput: 0: 11011.5. Samples: 125928840. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:48,977][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:49,915][626795] Updated weights for policy 0, policy_version 183572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:51,629][626795] Updated weights for policy 0, policy_version 183582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:53,422][626795] Updated weights for policy 0, policy_version 183592 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:53,975][24592] Fps is (10 sec: 47513.7, 60 sec: 43144.7, 300 sec: 43042.7). Total num frames: 1504010240. Throughput: 0: 11025.3. Samples: 125999562. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:53,976][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:55,129][626795] Updated weights for policy 0, policy_version 183602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:56,914][626795] Updated weights for policy 0, policy_version 183612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:36:58,735][626795] Updated weights for policy 0, policy_version 183622 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:58,975][24592] Fps is (10 sec: 46696.3, 60 sec: 44661.4, 300 sec: 43042.7). Total num frames: 1504239616. Throughput: 0: 11021.8. Samples: 126034896. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:36:58,976][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:00,324][626795] Updated weights for policy 0, policy_version 183632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:02,169][626795] Updated weights for policy 0, policy_version 183642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:03,892][626795] Updated weights for policy 0, policy_version 183652 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:03,976][24592] Fps is (10 sec: 46692.1, 60 sec: 44782.6, 300 sec: 43015.0). Total num frames: 1504477184. Throughput: 0: 10997.0. Samples: 126106098. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:03,977][24592] Avg episode reward: [(0, '5.026')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000183652_1504477184.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:04,040][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000182392_1494155264.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:05,589][626795] Updated weights for policy 0, policy_version 183662 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:07,286][626795] Updated weights for policy 0, policy_version 183672 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:11,682][24592] Fps is (10 sec: 34813.8, 60 sec: 42327.3, 300 sec: 42513.8). Total num frames: 1504681984. Throughput: 0: 9589.0. Samples: 126141162. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:11,685][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:12,450][626795] Updated weights for policy 0, policy_version 183682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:13,975][24592] Fps is (10 sec: 31130.9, 60 sec: 42052.5, 300 sec: 42459.6). Total num frames: 1504788480. Throughput: 0: 10097.6. Samples: 126173322. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:13,977][24592] Avg episode reward: [(0, '4.477')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:14,120][626795] Updated weights for policy 0, policy_version 183692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:15,940][626795] Updated weights for policy 0, policy_version 183702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:17,787][626795] Updated weights for policy 0, policy_version 183712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:18,976][24592] Fps is (10 sec: 47172.8, 60 sec: 42052.0, 300 sec: 42487.3). Total num frames: 1505026048. Throughput: 0: 10519.3. Samples: 126242256. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:18,978][24592] Avg episode reward: [(0, '4.970')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:19,379][626795] Updated weights for policy 0, policy_version 183722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:21,189][626795] Updated weights for policy 0, policy_version 183732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:22,880][626795] Updated weights for policy 0, policy_version 183742 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:23,975][24592] Fps is (10 sec: 46694.5, 60 sec: 41915.8, 300 sec: 42942.3). Total num frames: 1505255424. Throughput: 0: 10899.3. Samples: 126312660. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:23,976][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:24,685][626795] Updated weights for policy 0, policy_version 183752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:26,457][626795] Updated weights for policy 0, policy_version 183762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:28,122][626795] Updated weights for policy 0, policy_version 183772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:28,975][24592] Fps is (10 sec: 47515.4, 60 sec: 41915.8, 300 sec: 43014.9). Total num frames: 1505501184. Throughput: 0: 10905.7. Samples: 126348306. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:28,976][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:29,882][626795] Updated weights for policy 0, policy_version 183782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:31,563][626795] Updated weights for policy 0, policy_version 183792 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:33,319][626795] Updated weights for policy 0, policy_version 183802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:33,975][24592] Fps is (10 sec: 48333.0, 60 sec: 44559.0, 300 sec: 43014.9). Total num frames: 1505738752. Throughput: 0: 10896.2. Samples: 126419166. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:33,976][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:35,165][626795] Updated weights for policy 0, policy_version 183812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:36,848][626795] Updated weights for policy 0, policy_version 183822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:38,603][626795] Updated weights for policy 0, policy_version 183832 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:38,976][24592] Fps is (10 sec: 46693.4, 60 sec: 44509.7, 300 sec: 42987.1). Total num frames: 1505968128. Throughput: 0: 10902.9. Samples: 126490194. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:38,978][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:40,250][626795] Updated weights for policy 0, policy_version 183842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:42,056][626795] Updated weights for policy 0, policy_version 183852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:43,849][626795] Updated weights for policy 0, policy_version 183862 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:47,131][24592] Fps is (10 sec: 34870.2, 60 sec: 42155.9, 300 sec: 42504.7). Total num frames: 1506197504. Throughput: 0: 10169.0. Samples: 126524592. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:47,133][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:48,691][626795] Updated weights for policy 0, policy_version 183872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:48,975][24592] Fps is (10 sec: 31949.7, 60 sec: 41916.1, 300 sec: 42487.3). Total num frames: 1506287616. Throughput: 0: 10035.3. Samples: 126557682. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:48,977][24592] Avg episode reward: [(0, '4.873')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:50,424][626795] Updated weights for policy 0, policy_version 183882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:52,174][626795] Updated weights for policy 0, policy_version 183892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:53,929][626795] Updated weights for policy 0, policy_version 183902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:53,975][24592] Fps is (10 sec: 47878.3, 60 sec: 41915.7, 300 sec: 42487.4). Total num frames: 1506525184. Throughput: 0: 11530.3. Samples: 126628818. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:53,976][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:55,713][626795] Updated weights for policy 0, policy_version 183912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:57,412][626795] Updated weights for policy 0, policy_version 183922 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:58,975][24592] Fps is (10 sec: 47513.1, 60 sec: 42052.2, 300 sec: 42942.6). Total num frames: 1506762752. Throughput: 0: 10898.7. Samples: 126663762. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:37:58,976][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:37:59,125][626795] Updated weights for policy 0, policy_version 183932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:00,852][626795] Updated weights for policy 0, policy_version 183942 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:02,640][626795] Updated weights for policy 0, policy_version 183952 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:03,975][24592] Fps is (10 sec: 46694.3, 60 sec: 41916.0, 300 sec: 42987.2). Total num frames: 1506992128. Throughput: 0: 10939.2. Samples: 126734514. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:03,976][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:04,331][626795] Updated weights for policy 0, policy_version 183962 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:06,061][626795] Updated weights for policy 0, policy_version 183972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:07,800][626795] Updated weights for policy 0, policy_version 183982 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:08,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44467.9, 300 sec: 43015.0). Total num frames: 1507229696. Throughput: 0: 10948.9. Samples: 126805362. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:08,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:09,558][626795] Updated weights for policy 0, policy_version 183992 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:11,390][626795] Updated weights for policy 0, policy_version 184002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:12,983][626795] Updated weights for policy 0, policy_version 184012 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:13,976][24592] Fps is (10 sec: 47511.1, 60 sec: 44646.0, 300 sec: 43014.9). Total num frames: 1507467264. Throughput: 0: 10942.4. Samples: 126840720. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:13,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:14,714][626795] Updated weights for policy 0, policy_version 184022 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:16,549][626795] Updated weights for policy 0, policy_version 184032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:18,224][626795] Updated weights for policy 0, policy_version 184042 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:18,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44646.7, 300 sec: 43014.9). Total num frames: 1507704832. Throughput: 0: 10944.9. Samples: 126911688. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:18,976][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:23,140][626795] Updated weights for policy 0, policy_version 184052 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:23,975][24592] Fps is (10 sec: 31950.6, 60 sec: 42188.8, 300 sec: 42487.3). Total num frames: 1507786752. Throughput: 0: 10094.6. Samples: 126944448. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:23,976][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:24,901][626795] Updated weights for policy 0, policy_version 184062 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:26,599][626795] Updated weights for policy 0, policy_version 184072 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:28,391][626795] Updated weights for policy 0, policy_version 184082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:28,976][24592] Fps is (10 sec: 31947.9, 60 sec: 42052.1, 300 sec: 42515.1). Total num frames: 1508024320. Throughput: 0: 10891.7. Samples: 126980346. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:28,976][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:30,114][626795] Updated weights for policy 0, policy_version 184092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:31,823][626795] Updated weights for policy 0, policy_version 184102 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:33,615][626795] Updated weights for policy 0, policy_version 184112 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:33,976][24592] Fps is (10 sec: 47512.1, 60 sec: 42052.0, 300 sec: 43000.1). Total num frames: 1508261888. Throughput: 0: 10952.4. Samples: 127050546. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:33,977][24592] Avg episode reward: [(0, '4.996')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:35,413][626795] Updated weights for policy 0, policy_version 184122 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:37,089][626795] Updated weights for policy 0, policy_version 184132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:38,781][626795] Updated weights for policy 0, policy_version 184142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:38,976][24592] Fps is (10 sec: 46693.8, 60 sec: 42052.1, 300 sec: 43042.7). Total num frames: 1508491264. Throughput: 0: 10960.0. Samples: 127122024. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:38,976][24592] Avg episode reward: [(0, '5.020')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:40,566][626795] Updated weights for policy 0, policy_version 184152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:42,278][626795] Updated weights for policy 0, policy_version 184162 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:43,975][24592] Fps is (10 sec: 46695.7, 60 sec: 44531.1, 300 sec: 43042.7). Total num frames: 1508728832. Throughput: 0: 10961.1. Samples: 127157010. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:43,978][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:44,029][626795] Updated weights for policy 0, policy_version 184172 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:45,721][626795] Updated weights for policy 0, policy_version 184182 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:47,498][626795] Updated weights for policy 0, policy_version 184192 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:48,975][24592] Fps is (10 sec: 47515.2, 60 sec: 44646.3, 300 sec: 43070.5). Total num frames: 1508966400. Throughput: 0: 10958.7. Samples: 127227654. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:48,976][24592] Avg episode reward: [(0, '4.247')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:49,190][626795] Updated weights for policy 0, policy_version 184202 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:50,989][626795] Updated weights for policy 0, policy_version 184212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:52,715][626795] Updated weights for policy 0, policy_version 184222 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:53,976][24592] Fps is (10 sec: 47511.1, 60 sec: 44646.0, 300 sec: 43070.4). Total num frames: 1509203968. Throughput: 0: 10963.9. Samples: 127298742. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:53,980][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:54,474][626795] Updated weights for policy 0, policy_version 184232 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:58,975][24592] Fps is (10 sec: 31948.9, 60 sec: 42052.3, 300 sec: 42542.9). Total num frames: 1509285888. Throughput: 0: 10478.8. Samples: 127312260. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:38:58,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:38:59,418][626795] Updated weights for policy 0, policy_version 184242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:01,227][626795] Updated weights for policy 0, policy_version 184252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:02,884][626795] Updated weights for policy 0, policy_version 184262 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:03,976][24592] Fps is (10 sec: 31948.8, 60 sec: 42188.5, 300 sec: 42542.8). Total num frames: 1509523456. Throughput: 0: 10098.7. Samples: 127366134. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:03,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000184268_1509523456.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:04,048][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000183014_1499250688.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:04,672][626795] Updated weights for policy 0, policy_version 184272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:06,548][626795] Updated weights for policy 0, policy_version 184282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:08,186][626795] Updated weights for policy 0, policy_version 184292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:08,975][24592] Fps is (10 sec: 46694.9, 60 sec: 42052.3, 300 sec: 42975.0). Total num frames: 1509752832. Throughput: 0: 10924.9. Samples: 127436070. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:08,976][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:09,885][626795] Updated weights for policy 0, policy_version 184302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:11,673][626795] Updated weights for policy 0, policy_version 184312 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:13,392][626795] Updated weights for policy 0, policy_version 184322 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:13,975][24592] Fps is (10 sec: 46696.8, 60 sec: 42052.7, 300 sec: 43070.5). Total num frames: 1509990400. Throughput: 0: 10915.7. Samples: 127471548. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:13,976][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:15,062][626795] Updated weights for policy 0, policy_version 184332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:16,848][626795] Updated weights for policy 0, policy_version 184342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:18,656][626795] Updated weights for policy 0, policy_version 184352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:18,976][24592] Fps is (10 sec: 47512.2, 60 sec: 42052.1, 300 sec: 43070.5). Total num frames: 1510227968. Throughput: 0: 10923.5. Samples: 127542102. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:18,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:20,315][626795] Updated weights for policy 0, policy_version 184362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:22,038][626795] Updated weights for policy 0, policy_version 184372 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:23,752][626795] Updated weights for policy 0, policy_version 184382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:23,976][24592] Fps is (10 sec: 46693.0, 60 sec: 44509.6, 300 sec: 43070.4). Total num frames: 1510457344. Throughput: 0: 10921.0. Samples: 127613466. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:23,976][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:25,534][626795] Updated weights for policy 0, policy_version 184392 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:27,369][626795] Updated weights for policy 0, policy_version 184402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:28,975][24592] Fps is (10 sec: 46695.2, 60 sec: 44510.0, 300 sec: 43070.5). Total num frames: 1510694912. Throughput: 0: 10920.9. Samples: 127648452. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:28,976][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:28,990][626795] Updated weights for policy 0, policy_version 184412 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:33,913][626795] Updated weights for policy 0, policy_version 184422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:33,975][24592] Fps is (10 sec: 32769.0, 60 sec: 42052.5, 300 sec: 42542.9). Total num frames: 1510785024. Throughput: 0: 10328.7. Samples: 127692444. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:33,978][24592] Avg episode reward: [(0, '4.914')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:35,704][626795] Updated weights for policy 0, policy_version 184432 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:37,424][626795] Updated weights for policy 0, policy_version 184442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:38,975][24592] Fps is (10 sec: 31949.0, 60 sec: 42052.5, 300 sec: 42515.1). Total num frames: 1511014400. Throughput: 0: 10070.7. Samples: 127751916. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:38,976][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:39,171][626795] Updated weights for policy 0, policy_version 184452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:40,949][626795] Updated weights for policy 0, policy_version 184462 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:42,647][626795] Updated weights for policy 0, policy_version 184472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:43,976][24592] Fps is (10 sec: 46693.9, 60 sec: 42052.2, 300 sec: 42990.9). Total num frames: 1511251968. Throughput: 0: 10555.0. Samples: 127787238. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:43,979][24592] Avg episode reward: [(0, '4.973')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:44,451][626795] Updated weights for policy 0, policy_version 184482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:46,107][626795] Updated weights for policy 0, policy_version 184492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:47,846][626772] Signal inference workers to stop experience collection... (1800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:47,848][626772] Signal inference workers to resume experience collection... (1800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:47,856][626795] InferenceWorker_p0-w0: stopping experience collection (1800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:47,860][626795] InferenceWorker_p0-w0: resuming experience collection (1800 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:47,885][626795] Updated weights for policy 0, policy_version 184502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:48,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42052.2, 300 sec: 43070.5). Total num frames: 1511489536. Throughput: 0: 10923.6. Samples: 127857690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:48,976][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:49,720][626795] Updated weights for policy 0, policy_version 184512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:51,349][626795] Updated weights for policy 0, policy_version 184522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:53,009][626795] Updated weights for policy 0, policy_version 184532 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:53,976][24592] Fps is (10 sec: 47511.7, 60 sec: 42052.3, 300 sec: 43098.2). Total num frames: 1511727104. Throughput: 0: 10954.5. Samples: 127929030. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:53,977][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:54,793][626795] Updated weights for policy 0, policy_version 184542 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:56,602][626795] Updated weights for policy 0, policy_version 184552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:39:58,289][626795] Updated weights for policy 0, policy_version 184562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:58,975][24592] Fps is (10 sec: 46694.5, 60 sec: 44509.9, 300 sec: 43070.5). Total num frames: 1511956480. Throughput: 0: 10941.3. Samples: 127963908. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:39:58,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:00,047][626795] Updated weights for policy 0, policy_version 184572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:01,701][626795] Updated weights for policy 0, policy_version 184582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:03,518][626795] Updated weights for policy 0, policy_version 184592 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:03,975][24592] Fps is (10 sec: 47516.0, 60 sec: 44646.8, 300 sec: 43098.3). Total num frames: 1512202240. Throughput: 0: 10962.1. Samples: 128035392. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:03,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:05,149][626795] Updated weights for policy 0, policy_version 184602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:08,976][24592] Fps is (10 sec: 32767.1, 60 sec: 42188.5, 300 sec: 42570.6). Total num frames: 1512284160. Throughput: 0: 10202.9. Samples: 128072598. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:08,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:10,112][626795] Updated weights for policy 0, policy_version 184612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:11,853][626795] Updated weights for policy 0, policy_version 184622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:13,605][626795] Updated weights for policy 0, policy_version 184632 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:13,975][24592] Fps is (10 sec: 31948.6, 60 sec: 42188.8, 300 sec: 42570.6). Total num frames: 1512521728. Throughput: 0: 10132.8. Samples: 128104428. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:13,976][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:15,292][626795] Updated weights for policy 0, policy_version 184642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:17,059][626795] Updated weights for policy 0, policy_version 184652 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:18,724][626795] Updated weights for policy 0, policy_version 184662 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:18,976][24592] Fps is (10 sec: 47513.7, 60 sec: 42188.7, 300 sec: 43037.6). Total num frames: 1512759296. Throughput: 0: 10716.3. Samples: 128174682. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:18,977][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:20,532][626795] Updated weights for policy 0, policy_version 184672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:22,322][626795] Updated weights for policy 0, policy_version 184682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:23,976][24592] Fps is (10 sec: 46694.0, 60 sec: 42188.9, 300 sec: 43098.2). Total num frames: 1512988672. Throughput: 0: 10973.2. Samples: 128245710. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:23,977][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:23,981][626795] Updated weights for policy 0, policy_version 184692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:25,756][626795] Updated weights for policy 0, policy_version 184702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:27,472][626795] Updated weights for policy 0, policy_version 184712 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:28,975][24592] Fps is (10 sec: 47514.9, 60 sec: 42325.3, 300 sec: 43181.6). Total num frames: 1513234432. Throughput: 0: 10966.0. Samples: 128280708. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:28,978][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:29,259][626795] Updated weights for policy 0, policy_version 184722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:30,943][626795] Updated weights for policy 0, policy_version 184732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:32,611][626795] Updated weights for policy 0, policy_version 184742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:33,975][24592] Fps is (10 sec: 47514.2, 60 sec: 44646.4, 300 sec: 43153.8). Total num frames: 1513463808. Throughput: 0: 10981.9. Samples: 128351874. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:33,977][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:34,382][626795] Updated weights for policy 0, policy_version 184752 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:36,203][626795] Updated weights for policy 0, policy_version 184762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:37,797][626795] Updated weights for policy 0, policy_version 184772 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:38,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44782.9, 300 sec: 43153.8). Total num frames: 1513701376. Throughput: 0: 10982.8. Samples: 128423250. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:38,976][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:39,644][626795] Updated weights for policy 0, policy_version 184782 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:44,391][24592] Fps is (10 sec: 33031.8, 60 sec: 42305.1, 300 sec: 42677.0). Total num frames: 1513807872. Throughput: 0: 10114.1. Samples: 128423250. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:44,392][24592] Avg episode reward: [(0, '5.096')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:44,602][626795] Updated weights for policy 0, policy_version 184792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:46,274][626795] Updated weights for policy 0, policy_version 184802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:48,067][626795] Updated weights for policy 0, policy_version 184812 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:48,976][24592] Fps is (10 sec: 31947.6, 60 sec: 42188.5, 300 sec: 42709.4). Total num frames: 1514020864. Throughput: 0: 10127.4. Samples: 128491128. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:48,977][24592] Avg episode reward: [(0, '4.819')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:49,801][626795] Updated weights for policy 0, policy_version 184822 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:51,542][626795] Updated weights for policy 0, policy_version 184832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:53,388][626795] Updated weights for policy 0, policy_version 184842 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:53,975][24592] Fps is (10 sec: 46157.7, 60 sec: 42052.6, 300 sec: 43009.7). Total num frames: 1514250240. Throughput: 0: 10854.9. Samples: 128561064. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:53,976][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:55,046][626795] Updated weights for policy 0, policy_version 184852 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:56,801][626795] Updated weights for policy 0, policy_version 184862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:40:58,531][626795] Updated weights for policy 0, policy_version 184872 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:58,975][24592] Fps is (10 sec: 46696.4, 60 sec: 42188.8, 300 sec: 43042.7). Total num frames: 1514487808. Throughput: 0: 10929.6. Samples: 128596260. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:40:58,976][24592] Avg episode reward: [(0, '4.418')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:00,278][626795] Updated weights for policy 0, policy_version 184882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:01,919][626795] Updated weights for policy 0, policy_version 184892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:03,774][626795] Updated weights for policy 0, policy_version 184902 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:03,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42052.3, 300 sec: 43042.7). Total num frames: 1514725376. Throughput: 0: 10939.8. Samples: 128666970. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:03,976][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:03,998][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000184904_1514733568.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:04,061][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000183652_1504477184.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:05,455][626795] Updated weights for policy 0, policy_version 184912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:07,205][626795] Updated weights for policy 0, policy_version 184922 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:08,976][24592] Fps is (10 sec: 46691.9, 60 sec: 44509.7, 300 sec: 43014.9). Total num frames: 1514954752. Throughput: 0: 10938.8. Samples: 128737962. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:08,977][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:09,009][626795] Updated weights for policy 0, policy_version 184932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:10,645][626795] Updated weights for policy 0, policy_version 184942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:12,347][626795] Updated weights for policy 0, policy_version 184952 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:13,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44646.4, 300 sec: 43042.7). Total num frames: 1515200512. Throughput: 0: 10954.8. Samples: 128773674. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:13,978][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:14,187][626795] Updated weights for policy 0, policy_version 184962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:15,867][626795] Updated weights for policy 0, policy_version 184972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:19,841][24592] Fps is (10 sec: 33927.9, 60 sec: 42127.2, 300 sec: 42529.1). Total num frames: 1515323392. Throughput: 0: 9966.4. Samples: 128808990. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:19,842][24592] Avg episode reward: [(0, '4.951')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:20,749][626795] Updated weights for policy 0, policy_version 184982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:22,507][626795] Updated weights for policy 0, policy_version 184992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:23,975][24592] Fps is (10 sec: 31949.0, 60 sec: 42188.9, 300 sec: 42487.3). Total num frames: 1515520000. Throughput: 0: 10090.7. Samples: 128877330. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:23,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:24,248][626795] Updated weights for policy 0, policy_version 185002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:25,978][626795] Updated weights for policy 0, policy_version 185012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:27,770][626795] Updated weights for policy 0, policy_version 185022 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:28,975][24592] Fps is (10 sec: 47533.6, 60 sec: 42052.3, 300 sec: 43014.0). Total num frames: 1515757568. Throughput: 0: 10984.4. Samples: 128912976. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:28,977][24592] Avg episode reward: [(0, '4.880')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:29,503][626795] Updated weights for policy 0, policy_version 185032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:31,137][626795] Updated weights for policy 0, policy_version 185042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:32,975][626795] Updated weights for policy 0, policy_version 185052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:33,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42188.8, 300 sec: 43042.7). Total num frames: 1515995136. Throughput: 0: 10950.9. Samples: 128983914. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:33,976][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:34,690][626795] Updated weights for policy 0, policy_version 185062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:36,364][626795] Updated weights for policy 0, policy_version 185072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:38,100][626795] Updated weights for policy 0, policy_version 185082 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:38,976][24592] Fps is (10 sec: 46693.2, 60 sec: 42052.1, 300 sec: 43014.9). Total num frames: 1516224512. Throughput: 0: 10970.2. Samples: 129054726. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:38,976][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:39,853][626795] Updated weights for policy 0, policy_version 185092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:41,600][626795] Updated weights for policy 0, policy_version 185102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:43,287][626795] Updated weights for policy 0, policy_version 185112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44683.3, 300 sec: 43042.8). Total num frames: 1516470272. Throughput: 0: 10988.9. Samples: 129090762. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:43,976][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:45,043][626795] Updated weights for policy 0, policy_version 185122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:46,817][626795] Updated weights for policy 0, policy_version 185132 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:48,368][626795] Updated weights for policy 0, policy_version 185142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:48,975][24592] Fps is (10 sec: 48333.9, 60 sec: 44783.2, 300 sec: 43042.7). Total num frames: 1516707840. Throughput: 0: 10993.7. Samples: 129161688. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:48,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:50,208][626795] Updated weights for policy 0, policy_version 185152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:51,982][626795] Updated weights for policy 0, policy_version 185162 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:55,291][24592] Fps is (10 sec: 33302.0, 60 sec: 42352.4, 300 sec: 42547.5). Total num frames: 1516847104. Throughput: 0: 9916.9. Samples: 129197262. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:55,293][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:56,886][626795] Updated weights for policy 0, policy_version 185172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:41:58,531][626795] Updated weights for policy 0, policy_version 185182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:58,978][24592] Fps is (10 sec: 31940.6, 60 sec: 42323.5, 300 sec: 42542.6). Total num frames: 1517027328. Throughput: 0: 10148.8. Samples: 129230394. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:41:58,979][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:00,369][626795] Updated weights for policy 0, policy_version 185192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:02,148][626795] Updated weights for policy 0, policy_version 185202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:03,827][626795] Updated weights for policy 0, policy_version 185212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:03,975][24592] Fps is (10 sec: 47165.1, 60 sec: 42188.8, 300 sec: 43020.9). Total num frames: 1517256704. Throughput: 0: 11145.1. Samples: 129300870. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:03,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:05,495][626795] Updated weights for policy 0, policy_version 185222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:07,316][626795] Updated weights for policy 0, policy_version 185232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:08,975][24592] Fps is (10 sec: 46706.6, 60 sec: 42325.7, 300 sec: 43070.5). Total num frames: 1517494272. Throughput: 0: 11000.8. Samples: 129372366. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:08,976][24592] Avg episode reward: [(0, '4.968')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:09,006][626795] Updated weights for policy 0, policy_version 185242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:10,725][626795] Updated weights for policy 0, policy_version 185252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:12,469][626795] Updated weights for policy 0, policy_version 185262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:13,975][24592] Fps is (10 sec: 48332.2, 60 sec: 42325.3, 300 sec: 43098.3). Total num frames: 1517740032. Throughput: 0: 10994.2. Samples: 129407718. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:13,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:14,173][626795] Updated weights for policy 0, policy_version 185272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:15,839][626795] Updated weights for policy 0, policy_version 185282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:17,656][626795] Updated weights for policy 0, policy_version 185292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:18,975][24592] Fps is (10 sec: 48332.8, 60 sec: 44884.6, 300 sec: 43126.0). Total num frames: 1517977600. Throughput: 0: 11000.3. Samples: 129478926. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:18,976][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:19,446][626795] Updated weights for policy 0, policy_version 185302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:21,004][626795] Updated weights for policy 0, policy_version 185312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:22,893][626795] Updated weights for policy 0, policy_version 185322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:23,976][24592] Fps is (10 sec: 46694.2, 60 sec: 44782.8, 300 sec: 43070.5). Total num frames: 1518206976. Throughput: 0: 11002.3. Samples: 129549828. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:23,978][24592] Avg episode reward: [(0, '4.376')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:24,526][626795] Updated weights for policy 0, policy_version 185332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:26,221][626795] Updated weights for policy 0, policy_version 185342 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:30,746][24592] Fps is (10 sec: 34798.4, 60 sec: 42570.9, 300 sec: 42620.3). Total num frames: 1518387200. Throughput: 0: 10578.3. Samples: 129585516. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:30,748][24592] Avg episode reward: [(0, '4.996')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:31,248][626795] Updated weights for policy 0, policy_version 185352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:32,890][626795] Updated weights for policy 0, policy_version 185362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:33,975][24592] Fps is (10 sec: 32768.4, 60 sec: 42325.3, 300 sec: 42598.4). Total num frames: 1518534656. Throughput: 0: 10158.3. Samples: 129618810. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:33,977][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:34,599][626795] Updated weights for policy 0, policy_version 185372 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:36,450][626795] Updated weights for policy 0, policy_version 185382 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:38,044][626795] Updated weights for policy 0, policy_version 185392 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:38,976][24592] Fps is (10 sec: 45790.5, 60 sec: 42325.4, 300 sec: 43059.0). Total num frames: 1518764032. Throughput: 0: 11273.4. Samples: 129689736. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:38,977][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:39,895][626795] Updated weights for policy 0, policy_version 185402 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:41,600][626795] Updated weights for policy 0, policy_version 185412 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:43,303][626795] Updated weights for policy 0, policy_version 185422 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42325.3, 300 sec: 43126.0). Total num frames: 1519009792. Throughput: 0: 10998.2. Samples: 129725286. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:43,976][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:45,031][626795] Updated weights for policy 0, policy_version 185432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:46,819][626795] Updated weights for policy 0, policy_version 185442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:48,538][626795] Updated weights for policy 0, policy_version 185452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:48,975][24592] Fps is (10 sec: 47514.2, 60 sec: 42188.8, 300 sec: 43098.3). Total num frames: 1519239168. Throughput: 0: 11002.1. Samples: 129795966. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:48,977][24592] Avg episode reward: [(0, '4.971')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:50,260][626795] Updated weights for policy 0, policy_version 185462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:52,016][626795] Updated weights for policy 0, policy_version 185472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:53,817][626795] Updated weights for policy 0, policy_version 185482 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:53,976][24592] Fps is (10 sec: 46692.0, 60 sec: 44809.3, 300 sec: 43098.2). Total num frames: 1519476736. Throughput: 0: 10978.7. Samples: 129866412. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:53,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:55,418][626795] Updated weights for policy 0, policy_version 185492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:57,132][626795] Updated weights for policy 0, policy_version 185502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:42:58,964][626795] Updated weights for policy 0, policy_version 185512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:58,975][24592] Fps is (10 sec: 47513.2, 60 sec: 44784.8, 300 sec: 43126.0). Total num frames: 1519714304. Throughput: 0: 10998.8. Samples: 129902664. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:42:58,976][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:00,558][626795] Updated weights for policy 0, policy_version 185522 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:02,328][626795] Updated weights for policy 0, policy_version 185532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:06,188][24592] Fps is (10 sec: 34881.0, 60 sec: 42531.5, 300 sec: 42639.5). Total num frames: 1519902720. Throughput: 0: 10483.6. Samples: 129973890. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:06,190][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:06,240][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000185536_1519910912.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:06,286][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000184268_1509523456.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:07,358][626795] Updated weights for policy 0, policy_version 185542 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:08,975][24592] Fps is (10 sec: 31948.6, 60 sec: 42325.2, 300 sec: 42598.5). Total num frames: 1520033792. Throughput: 0: 10161.1. Samples: 130007076. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:08,976][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:09,018][626795] Updated weights for policy 0, policy_version 185552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:10,777][626795] Updated weights for policy 0, policy_version 185562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:12,587][626795] Updated weights for policy 0, policy_version 185572 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:13,975][24592] Fps is (10 sec: 47340.8, 60 sec: 42188.9, 300 sec: 42598.4). Total num frames: 1520271360. Throughput: 0: 10559.6. Samples: 130042002. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:13,976][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:14,178][626795] Updated weights for policy 0, policy_version 185582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:15,997][626795] Updated weights for policy 0, policy_version 185592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:17,725][626795] Updated weights for policy 0, policy_version 185602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:18,975][24592] Fps is (10 sec: 47514.5, 60 sec: 42188.8, 300 sec: 43126.0). Total num frames: 1520508928. Throughput: 0: 10980.0. Samples: 130112910. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:18,977][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:19,398][626795] Updated weights for policy 0, policy_version 185612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:21,170][626795] Updated weights for policy 0, policy_version 185622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:22,842][626795] Updated weights for policy 0, policy_version 185632 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:23,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42325.4, 300 sec: 43126.1). Total num frames: 1520746496. Throughput: 0: 10985.2. Samples: 130184070. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:23,976][24592] Avg episode reward: [(0, '4.840')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:24,591][626795] Updated weights for policy 0, policy_version 185642 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:26,335][626795] Updated weights for policy 0, policy_version 185652 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:28,086][626795] Updated weights for policy 0, policy_version 185662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:28,975][24592] Fps is (10 sec: 47513.4, 60 sec: 44597.2, 300 sec: 43126.1). Total num frames: 1520984064. Throughput: 0: 10982.5. Samples: 130219500. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:28,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:29,743][626795] Updated weights for policy 0, policy_version 185672 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:31,555][626795] Updated weights for policy 0, policy_version 185682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:33,349][626795] Updated weights for policy 0, policy_version 185692 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:33,975][24592] Fps is (10 sec: 46694.5, 60 sec: 44646.4, 300 sec: 43126.1). Total num frames: 1521213440. Throughput: 0: 10982.5. Samples: 130290180. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:33,978][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:35,053][626795] Updated weights for policy 0, policy_version 185702 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:36,705][626795] Updated weights for policy 0, policy_version 185712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:41,638][24592] Fps is (10 sec: 34934.5, 60 sec: 42487.8, 300 sec: 42657.7). Total num frames: 1521426432. Throughput: 0: 9644.4. Samples: 130326084. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:41,639][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:41,740][626795] Updated weights for policy 0, policy_version 185722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:43,378][626795] Updated weights for policy 0, policy_version 185732 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:43,975][24592] Fps is (10 sec: 32768.1, 60 sec: 42188.8, 300 sec: 42626.2). Total num frames: 1521541120. Throughput: 0: 10151.4. Samples: 130359474. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:43,976][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:45,127][626795] Updated weights for policy 0, policy_version 185742 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:46,913][626795] Updated weights for policy 0, policy_version 185752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:48,630][626795] Updated weights for policy 0, policy_version 185762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:48,988][24592] Fps is (10 sec: 46812.6, 60 sec: 42180.0, 300 sec: 42596.7). Total num frames: 1521770496. Throughput: 0: 10653.5. Samples: 130429854. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:48,989][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:50,429][626795] Updated weights for policy 0, policy_version 185772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:52,129][626795] Updated weights for policy 0, policy_version 185782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:53,838][626795] Updated weights for policy 0, policy_version 185792 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:53,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42189.2, 300 sec: 43126.0). Total num frames: 1522008064. Throughput: 0: 10955.6. Samples: 130500078. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:53,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:55,596][626795] Updated weights for policy 0, policy_version 185802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:57,390][626795] Updated weights for policy 0, policy_version 185812 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:58,975][24592] Fps is (10 sec: 47573.6, 60 sec: 42188.9, 300 sec: 43126.1). Total num frames: 1522245632. Throughput: 0: 10977.2. Samples: 130535976. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:43:58,976][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:43:59,002][626795] Updated weights for policy 0, policy_version 185822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:00,772][626795] Updated weights for policy 0, policy_version 185832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:02,515][626795] Updated weights for policy 0, policy_version 185842 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:03,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44513.3, 300 sec: 43126.0). Total num frames: 1522475008. Throughput: 0: 10976.9. Samples: 130606872. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:03,976][24592] Avg episode reward: [(0, '5.003')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:04,202][626795] Updated weights for policy 0, policy_version 185852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:05,967][626795] Updated weights for policy 0, policy_version 185862 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:07,629][626795] Updated weights for policy 0, policy_version 185872 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:08,975][24592] Fps is (10 sec: 47513.4, 60 sec: 44783.0, 300 sec: 43153.8). Total num frames: 1522720768. Throughput: 0: 10981.9. Samples: 130678254. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:08,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:09,433][626795] Updated weights for policy 0, policy_version 185882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:11,158][626795] Updated weights for policy 0, policy_version 185892 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:12,911][626795] Updated weights for policy 0, policy_version 185902 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:17,089][24592] Fps is (10 sec: 36857.2, 60 sec: 42573.7, 300 sec: 42703.1). Total num frames: 1522958336. Throughput: 0: 10272.5. Samples: 130713744. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:17,090][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:17,761][626795] Updated weights for policy 0, policy_version 185912 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:18,975][24592] Fps is (10 sec: 31948.5, 60 sec: 42188.7, 300 sec: 42654.0). Total num frames: 1523040256. Throughput: 0: 10156.4. Samples: 130747218. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:18,976][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:19,563][626795] Updated weights for policy 0, policy_version 185922 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:21,196][626795] Updated weights for policy 0, policy_version 185932 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:23,050][626795] Updated weights for policy 0, policy_version 185942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:23,975][24592] Fps is (10 sec: 46393.6, 60 sec: 42188.8, 300 sec: 42653.9). Total num frames: 1523277824. Throughput: 0: 11608.8. Samples: 130817568. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:23,977][24592] Avg episode reward: [(0, '4.851')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:24,680][626795] Updated weights for policy 0, policy_version 185952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:26,536][626795] Updated weights for policy 0, policy_version 185962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:28,126][626795] Updated weights for policy 0, policy_version 185972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:28,976][24592] Fps is (10 sec: 47510.9, 60 sec: 42188.3, 300 sec: 43153.7). Total num frames: 1523515392. Throughput: 0: 10970.0. Samples: 130853130. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:28,977][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:29,995][626795] Updated weights for policy 0, policy_version 185982 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:31,639][626795] Updated weights for policy 0, policy_version 185992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:33,406][626795] Updated weights for policy 0, policy_version 186002 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:33,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42325.3, 300 sec: 43181.6). Total num frames: 1523752960. Throughput: 0: 10988.9. Samples: 130924218. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:33,977][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:35,049][626795] Updated weights for policy 0, policy_version 186012 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:36,792][626795] Updated weights for policy 0, policy_version 186022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:38,526][626795] Updated weights for policy 0, policy_version 186032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:38,975][24592] Fps is (10 sec: 47516.7, 60 sec: 44719.6, 300 sec: 43181.6). Total num frames: 1523990528. Throughput: 0: 11012.5. Samples: 130995642. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:38,977][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:40,300][626795] Updated weights for policy 0, policy_version 186042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:41,985][626795] Updated weights for policy 0, policy_version 186052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:43,730][626795] Updated weights for policy 0, policy_version 186062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:43,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44782.9, 300 sec: 43181.6). Total num frames: 1524228096. Throughput: 0: 11009.5. Samples: 131031402. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:43,976][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:45,447][626795] Updated weights for policy 0, policy_version 186072 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:47,213][626795] Updated weights for policy 0, policy_version 186082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:48,904][626795] Updated weights for policy 0, policy_version 186092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:48,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44928.9, 300 sec: 43181.6). Total num frames: 1524465664. Throughput: 0: 10994.1. Samples: 131101608. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:48,976][24592] Avg episode reward: [(0, '4.564')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:53,849][626795] Updated weights for policy 0, policy_version 186102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:53,976][24592] Fps is (10 sec: 31948.1, 60 sec: 42325.2, 300 sec: 42681.7). Total num frames: 1524547584. Throughput: 0: 10158.3. Samples: 131135382. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:53,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:55,647][626795] Updated weights for policy 0, policy_version 186112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:57,404][626795] Updated weights for policy 0, policy_version 186122 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:58,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42325.3, 300 sec: 42653.9). Total num frames: 1524785152. Throughput: 0: 10891.1. Samples: 131169936. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:44:58,978][24592] Avg episode reward: [(0, '4.885')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:44:59,107][626795] Updated weights for policy 0, policy_version 186132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:00,758][626795] Updated weights for policy 0, policy_version 186142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:02,542][626795] Updated weights for policy 0, policy_version 186152 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:03,975][24592] Fps is (10 sec: 47514.2, 60 sec: 42461.8, 300 sec: 43181.6). Total num frames: 1525022720. Throughput: 0: 10978.3. Samples: 131241240. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:03,977][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000186160_1525022720.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:04,034][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000184904_1514733568.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:04,370][626795] Updated weights for policy 0, policy_version 186162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:06,157][626795] Updated weights for policy 0, policy_version 186172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:07,813][626795] Updated weights for policy 0, policy_version 186182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:08,976][24592] Fps is (10 sec: 46690.6, 60 sec: 42188.2, 300 sec: 43153.7). Total num frames: 1525252096. Throughput: 0: 10970.2. Samples: 131311236. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:08,978][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:09,541][626795] Updated weights for policy 0, policy_version 186192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:11,270][626795] Updated weights for policy 0, policy_version 186202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:13,097][626795] Updated weights for policy 0, policy_version 186212 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:13,975][24592] Fps is (10 sec: 46694.5, 60 sec: 44497.8, 300 sec: 43153.8). Total num frames: 1525489664. Throughput: 0: 10962.9. Samples: 131346456. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:13,977][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:14,708][626795] Updated weights for policy 0, policy_version 186222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:16,488][626795] Updated weights for policy 0, policy_version 186232 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:18,145][626795] Updated weights for policy 0, policy_version 186242 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:18,975][24592] Fps is (10 sec: 47517.3, 60 sec: 44782.9, 300 sec: 43181.6). Total num frames: 1525727232. Throughput: 0: 10971.5. Samples: 131417934. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:18,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:19,889][626795] Updated weights for policy 0, policy_version 186252 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:21,711][626795] Updated weights for policy 0, policy_version 186262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:23,431][626795] Updated weights for policy 0, policy_version 186272 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:23,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44782.9, 300 sec: 43153.8). Total num frames: 1525964800. Throughput: 0: 10945.6. Samples: 131488194. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:23,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:28,343][626795] Updated weights for policy 0, policy_version 186282 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:28,975][24592] Fps is (10 sec: 31949.0, 60 sec: 42189.3, 300 sec: 42653.9). Total num frames: 1526046720. Throughput: 0: 10450.3. Samples: 131501664. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:28,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:30,102][626795] Updated weights for policy 0, policy_version 186292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:31,792][626795] Updated weights for policy 0, policy_version 186302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:33,526][626795] Updated weights for policy 0, policy_version 186312 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:33,975][24592] Fps is (10 sec: 31948.8, 60 sec: 42188.8, 300 sec: 42653.9). Total num frames: 1526284288. Throughput: 0: 10113.9. Samples: 131556732. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:33,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:35,201][626795] Updated weights for policy 0, policy_version 186322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:37,042][626795] Updated weights for policy 0, policy_version 186332 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:38,837][626795] Updated weights for policy 0, policy_version 186342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:38,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42188.8, 300 sec: 43159.1). Total num frames: 1526521856. Throughput: 0: 10942.5. Samples: 131627790. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:38,977][24592] Avg episode reward: [(0, '4.854')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:40,480][626795] Updated weights for policy 0, policy_version 186352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:42,131][626795] Updated weights for policy 0, policy_version 186362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:43,911][626795] Updated weights for policy 0, policy_version 186372 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:43,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42188.8, 300 sec: 43181.6). Total num frames: 1526759424. Throughput: 0: 10964.7. Samples: 131663346. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:43,976][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:45,662][626795] Updated weights for policy 0, policy_version 186382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:47,368][626795] Updated weights for policy 0, policy_version 186392 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:48,976][24592] Fps is (10 sec: 47512.7, 60 sec: 42188.7, 300 sec: 43209.3). Total num frames: 1526996992. Throughput: 0: 10980.5. Samples: 131735364. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:48,976][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:49,126][626795] Updated weights for policy 0, policy_version 186402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:50,848][626795] Updated weights for policy 0, policy_version 186412 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:52,540][626795] Updated weights for policy 0, policy_version 186422 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:53,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44783.1, 300 sec: 43209.3). Total num frames: 1527234560. Throughput: 0: 10978.1. Samples: 131805240. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:53,977][24592] Avg episode reward: [(0, '4.981')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:54,311][626795] Updated weights for policy 0, policy_version 186432 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:56,091][626795] Updated weights for policy 0, policy_version 186442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:57,873][626795] Updated weights for policy 0, policy_version 186452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:58,976][24592] Fps is (10 sec: 46694.0, 60 sec: 44646.2, 300 sec: 43181.5). Total num frames: 1527463936. Throughput: 0: 10991.0. Samples: 131841054. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:45:58,976][24592] Avg episode reward: [(0, '4.913')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:45:59,594][626795] Updated weights for policy 0, policy_version 186462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:03,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42052.3, 300 sec: 42681.8). Total num frames: 1527545856. Throughput: 0: 10322.0. Samples: 131882424. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:03,976][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:04,521][626795] Updated weights for policy 0, policy_version 186472 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:06,300][626795] Updated weights for policy 0, policy_version 186482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:07,946][626795] Updated weights for policy 0, policy_version 186492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:08,976][24592] Fps is (10 sec: 31948.9, 60 sec: 42189.2, 300 sec: 42653.9). Total num frames: 1527783424. Throughput: 0: 10136.5. Samples: 131944338. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:08,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:09,795][626795] Updated weights for policy 0, policy_version 186502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:11,457][626795] Updated weights for policy 0, policy_version 186512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:13,254][626795] Updated weights for policy 0, policy_version 186522 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:13,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42188.9, 300 sec: 43169.4). Total num frames: 1528020992. Throughput: 0: 10615.7. Samples: 131979372. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:13,976][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:14,901][626795] Updated weights for policy 0, policy_version 186532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:16,658][626795] Updated weights for policy 0, policy_version 186542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:18,389][626795] Updated weights for policy 0, policy_version 186552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:18,975][24592] Fps is (10 sec: 47514.6, 60 sec: 42188.8, 300 sec: 43181.5). Total num frames: 1528258560. Throughput: 0: 10967.1. Samples: 132050250. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:18,978][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:20,158][626795] Updated weights for policy 0, policy_version 186562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:21,792][626795] Updated weights for policy 0, policy_version 186572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:23,517][626795] Updated weights for policy 0, policy_version 186582 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:23,976][24592] Fps is (10 sec: 47512.7, 60 sec: 42188.7, 300 sec: 43181.5). Total num frames: 1528496128. Throughput: 0: 10976.6. Samples: 132121740. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:23,976][24592] Avg episode reward: [(0, '4.805')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:25,345][626795] Updated weights for policy 0, policy_version 186592 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:27,032][626795] Updated weights for policy 0, policy_version 186602 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:28,751][626795] Updated weights for policy 0, policy_version 186612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:28,982][24592] Fps is (10 sec: 47483.8, 60 sec: 44778.2, 300 sec: 43180.6). Total num frames: 1528733696. Throughput: 0: 10957.4. Samples: 132156498. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:28,983][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:29,294][626772] Signal inference workers to stop experience collection... (1850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:29,295][626772] Signal inference workers to resume experience collection... (1850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:29,302][626795] InferenceWorker_p0-w0: stopping experience collection (1850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:29,308][626795] InferenceWorker_p0-w0: resuming experience collection (1850 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:30,548][626795] Updated weights for policy 0, policy_version 186622 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:32,237][626795] Updated weights for policy 0, policy_version 186632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:33,975][24592] Fps is (10 sec: 46695.2, 60 sec: 44646.4, 300 sec: 43181.6). Total num frames: 1528963072. Throughput: 0: 10934.2. Samples: 132227400. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:33,977][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:34,040][626795] Updated weights for policy 0, policy_version 186642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:38,975][24592] Fps is (10 sec: 31149.0, 60 sec: 42052.2, 300 sec: 42626.2). Total num frames: 1529044992. Throughput: 0: 10161.3. Samples: 132262500. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:38,977][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:39,041][626795] Updated weights for policy 0, policy_version 186652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:40,728][626795] Updated weights for policy 0, policy_version 186662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:42,508][626795] Updated weights for policy 0, policy_version 186672 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:43,975][24592] Fps is (10 sec: 31948.6, 60 sec: 42052.2, 300 sec: 42626.2). Total num frames: 1529282560. Throughput: 0: 10081.5. Samples: 132294720. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:43,978][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:44,356][626795] Updated weights for policy 0, policy_version 186682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:45,926][626795] Updated weights for policy 0, policy_version 186692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:47,797][626795] Updated weights for policy 0, policy_version 186702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:48,975][24592] Fps is (10 sec: 47514.0, 60 sec: 42052.4, 300 sec: 43151.8). Total num frames: 1529520128. Throughput: 0: 10721.9. Samples: 132364908. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:48,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:49,537][626795] Updated weights for policy 0, policy_version 186712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:51,240][626795] Updated weights for policy 0, policy_version 186722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:52,998][626795] Updated weights for policy 0, policy_version 186732 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:53,976][24592] Fps is (10 sec: 46693.8, 60 sec: 41915.6, 300 sec: 43126.4). Total num frames: 1529749504. Throughput: 0: 10926.9. Samples: 132436050. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:53,976][24592] Avg episode reward: [(0, '4.953')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:54,676][626795] Updated weights for policy 0, policy_version 186742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:56,400][626795] Updated weights for policy 0, policy_version 186752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:58,156][626795] Updated weights for policy 0, policy_version 186762 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:58,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42052.5, 300 sec: 43153.8). Total num frames: 1529987072. Throughput: 0: 10931.1. Samples: 132471270. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:46:58,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:46:59,903][626795] Updated weights for policy 0, policy_version 186772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:01,600][626795] Updated weights for policy 0, policy_version 186782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:03,400][626795] Updated weights for policy 0, policy_version 186792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:03,976][24592] Fps is (10 sec: 47510.1, 60 sec: 44645.7, 300 sec: 43153.6). Total num frames: 1530224640. Throughput: 0: 10934.3. Samples: 132542304. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:03,978][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000186795_1530224640.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:04,034][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000185536_1519910912.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:05,185][626795] Updated weights for policy 0, policy_version 186802 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:06,828][626795] Updated weights for policy 0, policy_version 186812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:08,603][626795] Updated weights for policy 0, policy_version 186822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:08,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44646.6, 300 sec: 43126.0). Total num frames: 1530462208. Throughput: 0: 10906.6. Samples: 132612534. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:08,976][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:10,411][626795] Updated weights for policy 0, policy_version 186832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:14,310][24592] Fps is (10 sec: 32501.6, 60 sec: 42090.4, 300 sec: 42605.6). Total num frames: 1530560512. Throughput: 0: 10060.7. Samples: 132612534. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:14,312][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:15,325][626795] Updated weights for policy 0, policy_version 186842 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:17,099][626795] Updated weights for policy 0, policy_version 186852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:18,817][626795] Updated weights for policy 0, policy_version 186862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:18,975][24592] Fps is (10 sec: 31948.6, 60 sec: 42052.3, 300 sec: 42626.2). Total num frames: 1530781696. Throughput: 0: 10063.3. Samples: 132680250. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:18,976][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:20,496][626795] Updated weights for policy 0, policy_version 186872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:22,301][626795] Updated weights for policy 0, policy_version 186882 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:23,975][24592] Fps is (10 sec: 46617.7, 60 sec: 41915.9, 300 sec: 43051.2). Total num frames: 1531011072. Throughput: 0: 10867.0. Samples: 132751512. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:23,976][24592] Avg episode reward: [(0, '4.969')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:24,047][626795] Updated weights for policy 0, policy_version 186892 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:25,690][626795] Updated weights for policy 0, policy_version 186902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:27,481][626795] Updated weights for policy 0, policy_version 186912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:28,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42056.7, 300 sec: 43126.0). Total num frames: 1531256832. Throughput: 0: 10927.4. Samples: 132786450. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:28,978][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:29,263][626795] Updated weights for policy 0, policy_version 186922 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:30,878][626795] Updated weights for policy 0, policy_version 186932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:32,595][626795] Updated weights for policy 0, policy_version 186942 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:33,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42052.3, 300 sec: 43126.0). Total num frames: 1531486208. Throughput: 0: 10962.4. Samples: 132858216. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:33,978][24592] Avg episode reward: [(0, '5.078')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:34,317][626795] Updated weights for policy 0, policy_version 186952 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:36,020][626795] Updated weights for policy 0, policy_version 186962 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:37,831][626795] Updated weights for policy 0, policy_version 186972 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:38,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44646.5, 300 sec: 43098.3). Total num frames: 1531723776. Throughput: 0: 10955.4. Samples: 132929040. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:38,976][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:39,643][626795] Updated weights for policy 0, policy_version 186982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:41,391][626795] Updated weights for policy 0, policy_version 186992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:42,971][626795] Updated weights for policy 0, policy_version 187002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44646.5, 300 sec: 43126.0). Total num frames: 1531961344. Throughput: 0: 10955.7. Samples: 132964278. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:43,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:44,795][626795] Updated weights for policy 0, policy_version 187012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:46,489][626795] Updated weights for policy 0, policy_version 187022 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:49,825][24592] Fps is (10 sec: 33220.6, 60 sec: 42137.9, 300 sec: 42614.5). Total num frames: 1532084224. Throughput: 0: 9975.0. Samples: 132999648. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:49,827][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:51,553][626795] Updated weights for policy 0, policy_version 187032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:53,222][626795] Updated weights for policy 0, policy_version 187042 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:53,977][24592] Fps is (10 sec: 31125.6, 60 sec: 42051.5, 300 sec: 42570.5). Total num frames: 1532272640. Throughput: 0: 10096.9. Samples: 133066908. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:53,978][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:55,021][626795] Updated weights for policy 0, policy_version 187052 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:56,875][626795] Updated weights for policy 0, policy_version 187062 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:47:58,650][626795] Updated weights for policy 0, policy_version 187072 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:58,976][24592] Fps is (10 sec: 46554.0, 60 sec: 42051.9, 300 sec: 43060.2). Total num frames: 1532510208. Throughput: 0: 10943.2. Samples: 133101318. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:47:58,976][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:00,302][626795] Updated weights for policy 0, policy_version 187082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:02,043][626795] Updated weights for policy 0, policy_version 187092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:03,837][626795] Updated weights for policy 0, policy_version 187102 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:03,975][24592] Fps is (10 sec: 47519.7, 60 sec: 42052.9, 300 sec: 43098.3). Total num frames: 1532747776. Throughput: 0: 10924.3. Samples: 133171842. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:03,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:05,501][626795] Updated weights for policy 0, policy_version 187112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:07,195][626795] Updated weights for policy 0, policy_version 187122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:08,899][626795] Updated weights for policy 0, policy_version 187132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:08,977][24592] Fps is (10 sec: 47509.7, 60 sec: 42051.3, 300 sec: 43098.1). Total num frames: 1532985344. Throughput: 0: 10952.0. Samples: 133244364. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:08,979][24592] Avg episode reward: [(0, '4.376')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:10,569][626795] Updated weights for policy 0, policy_version 187142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:12,203][626795] Updated weights for policy 0, policy_version 187152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:13,922][626795] Updated weights for policy 0, policy_version 187162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:13,975][24592] Fps is (10 sec: 48332.8, 60 sec: 44759.8, 300 sec: 43126.0). Total num frames: 1533231104. Throughput: 0: 10977.5. Samples: 133280436. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:13,977][24592] Avg episode reward: [(0, '4.424')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:15,760][626795] Updated weights for policy 0, policy_version 187172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:17,458][626795] Updated weights for policy 0, policy_version 187182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:18,975][24592] Fps is (10 sec: 47519.4, 60 sec: 44646.4, 300 sec: 43098.2). Total num frames: 1533460480. Throughput: 0: 10962.0. Samples: 133351506. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:18,976][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:19,082][626795] Updated weights for policy 0, policy_version 187192 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:20,964][626795] Updated weights for policy 0, policy_version 187202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:25,225][24592] Fps is (10 sec: 33497.2, 60 sec: 42398.0, 300 sec: 42612.3). Total num frames: 1533607936. Throughput: 0: 9905.8. Samples: 133387182. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:25,226][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:25,802][626795] Updated weights for policy 0, policy_version 187212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:27,570][626795] Updated weights for policy 0, policy_version 187222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:28,975][24592] Fps is (10 sec: 31948.8, 60 sec: 42052.2, 300 sec: 42598.4). Total num frames: 1533779968. Throughput: 0: 10149.9. Samples: 133421022. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:28,976][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:29,312][626795] Updated weights for policy 0, policy_version 187232 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:31,132][626795] Updated weights for policy 0, policy_version 187242 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:32,936][626795] Updated weights for policy 0, policy_version 187252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:33,975][24592] Fps is (10 sec: 46809.5, 60 sec: 42188.8, 300 sec: 43070.5). Total num frames: 1534017536. Throughput: 0: 11104.4. Samples: 133489908. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:33,976][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:34,659][626795] Updated weights for policy 0, policy_version 187262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:36,434][626795] Updated weights for policy 0, policy_version 187272 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:38,152][626795] Updated weights for policy 0, policy_version 187282 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:38,975][24592] Fps is (10 sec: 47513.2, 60 sec: 42188.7, 300 sec: 43098.2). Total num frames: 1534255104. Throughput: 0: 10965.1. Samples: 133560324. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:38,976][24592] Avg episode reward: [(0, '4.376')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:39,837][626795] Updated weights for policy 0, policy_version 187292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:41,518][626795] Updated weights for policy 0, policy_version 187302 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:43,231][626795] Updated weights for policy 0, policy_version 187312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:43,976][24592] Fps is (10 sec: 47508.8, 60 sec: 42188.1, 300 sec: 43127.7). Total num frames: 1534492672. Throughput: 0: 11008.5. Samples: 133596708. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:43,977][24592] Avg episode reward: [(0, '4.887')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:44,974][626795] Updated weights for policy 0, policy_version 187322 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:46,668][626795] Updated weights for policy 0, policy_version 187332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:48,341][626795] Updated weights for policy 0, policy_version 187342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:48,975][24592] Fps is (10 sec: 47514.2, 60 sec: 44734.1, 300 sec: 43126.0). Total num frames: 1534730240. Throughput: 0: 11026.1. Samples: 133668018. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:48,976][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:50,100][626795] Updated weights for policy 0, policy_version 187352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:51,944][626795] Updated weights for policy 0, policy_version 187362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:53,520][626795] Updated weights for policy 0, policy_version 187372 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:53,975][24592] Fps is (10 sec: 47518.3, 60 sec: 44920.4, 300 sec: 43126.0). Total num frames: 1534967808. Throughput: 0: 10999.6. Samples: 133739334. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:48:53,976][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:55,287][626795] Updated weights for policy 0, policy_version 187382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:48:57,056][626795] Updated weights for policy 0, policy_version 187392 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:00,650][24592] Fps is (10 sec: 34383.7, 60 sec: 42504.9, 300 sec: 42661.7). Total num frames: 1535131648. Throughput: 0: 10593.8. Samples: 133774896. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:00,651][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:01,861][626795] Updated weights for policy 0, policy_version 187402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:03,719][626795] Updated weights for policy 0, policy_version 187412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:03,975][24592] Fps is (10 sec: 31949.0, 60 sec: 42325.3, 300 sec: 42598.4). Total num frames: 1535287296. Throughput: 0: 10151.9. Samples: 133808340. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:03,978][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000187413_1535287296.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:04,037][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000186160_1525022720.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:05,624][626795] Updated weights for policy 0, policy_version 187422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:07,412][626795] Updated weights for policy 0, policy_version 187432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:08,975][24592] Fps is (10 sec: 45261.5, 60 sec: 42053.2, 300 sec: 42996.7). Total num frames: 1535508480. Throughput: 0: 11158.3. Samples: 133875360. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:08,976][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:09,122][626795] Updated weights for policy 0, policy_version 187442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:10,938][626795] Updated weights for policy 0, policy_version 187452 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:12,678][626795] Updated weights for policy 0, policy_version 187462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:13,975][24592] Fps is (10 sec: 45875.4, 60 sec: 41915.8, 300 sec: 43070.5). Total num frames: 1535746048. Throughput: 0: 10885.9. Samples: 133910886. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:13,976][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:14,443][626795] Updated weights for policy 0, policy_version 187472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:16,110][626795] Updated weights for policy 0, policy_version 187482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:17,870][626795] Updated weights for policy 0, policy_version 187492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:18,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42052.3, 300 sec: 43070.5). Total num frames: 1535983616. Throughput: 0: 10934.7. Samples: 133981968. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:18,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:19,515][626795] Updated weights for policy 0, policy_version 187502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:21,351][626795] Updated weights for policy 0, policy_version 187512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:22,976][626795] Updated weights for policy 0, policy_version 187522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:23,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44480.5, 300 sec: 43070.6). Total num frames: 1536221184. Throughput: 0: 10955.6. Samples: 134053326. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:23,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:24,681][626795] Updated weights for policy 0, policy_version 187532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:26,430][626795] Updated weights for policy 0, policy_version 187542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:28,200][626795] Updated weights for policy 0, policy_version 187552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:28,976][24592] Fps is (10 sec: 47511.7, 60 sec: 44646.1, 300 sec: 43070.4). Total num frames: 1536458752. Throughput: 0: 10946.8. Samples: 134089308. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:28,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:29,925][626795] Updated weights for policy 0, policy_version 187562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:31,602][626795] Updated weights for policy 0, policy_version 187572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:35,882][24592] Fps is (10 sec: 35778.0, 60 sec: 42477.6, 300 sec: 42628.4). Total num frames: 1536647168. Throughput: 0: 10495.5. Samples: 134160324. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:35,883][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:36,323][626795] Updated weights for policy 0, policy_version 187582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:38,071][626795] Updated weights for policy 0, policy_version 187592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:38,975][24592] Fps is (10 sec: 32769.3, 60 sec: 42188.9, 300 sec: 42570.6). Total num frames: 1536786432. Throughput: 0: 10131.1. Samples: 134195232. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:38,977][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:39,925][626795] Updated weights for policy 0, policy_version 187602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:41,736][626795] Updated weights for policy 0, policy_version 187612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:43,593][626795] Updated weights for policy 0, policy_version 187622 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:43,976][24592] Fps is (10 sec: 46557.6, 60 sec: 42189.4, 300 sec: 42570.6). Total num frames: 1537024000. Throughput: 0: 10484.7. Samples: 134229156. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:43,976][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:45,186][626795] Updated weights for policy 0, policy_version 187632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:46,974][626795] Updated weights for policy 0, policy_version 187642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:48,738][626795] Updated weights for policy 0, policy_version 187652 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:48,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42052.2, 300 sec: 43070.5). Total num frames: 1537253376. Throughput: 0: 10911.2. Samples: 134299344. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:48,976][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:50,478][626795] Updated weights for policy 0, policy_version 187662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:52,204][626795] Updated weights for policy 0, policy_version 187672 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:53,910][626795] Updated weights for policy 0, policy_version 187682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:53,975][24592] Fps is (10 sec: 46695.5, 60 sec: 42052.3, 300 sec: 43070.5). Total num frames: 1537490944. Throughput: 0: 10999.5. Samples: 134370336. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:53,977][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:55,668][626795] Updated weights for policy 0, policy_version 187692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:57,382][626795] Updated weights for policy 0, policy_version 187702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:49:58,972][626795] Updated weights for policy 0, policy_version 187712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:58,975][24592] Fps is (10 sec: 48333.0, 60 sec: 44664.0, 300 sec: 43098.3). Total num frames: 1537736704. Throughput: 0: 11001.5. Samples: 134405952. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:49:58,977][24592] Avg episode reward: [(0, '4.890')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:00,841][626795] Updated weights for policy 0, policy_version 187722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:02,529][626795] Updated weights for policy 0, policy_version 187732 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:03,976][24592] Fps is (10 sec: 47511.8, 60 sec: 44646.1, 300 sec: 43098.3). Total num frames: 1537966080. Throughput: 0: 11013.8. Samples: 134477592. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:03,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:04,219][626795] Updated weights for policy 0, policy_version 187742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:06,001][626795] Updated weights for policy 0, policy_version 187752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:07,660][626795] Updated weights for policy 0, policy_version 187762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:11,109][24592] Fps is (10 sec: 35106.1, 60 sec: 42717.4, 300 sec: 42650.9). Total num frames: 1538162688. Throughput: 0: 9745.3. Samples: 134512662. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:11,111][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:12,546][626795] Updated weights for policy 0, policy_version 187772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:13,975][24592] Fps is (10 sec: 32769.1, 60 sec: 42461.8, 300 sec: 42598.4). Total num frames: 1538293760. Throughput: 0: 10204.1. Samples: 134548488. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:13,976][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:14,395][626795] Updated weights for policy 0, policy_version 187782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:16,243][626795] Updated weights for policy 0, policy_version 187792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:18,074][626795] Updated weights for policy 0, policy_version 187802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:18,975][24592] Fps is (10 sec: 44782.9, 60 sec: 42188.8, 300 sec: 42542.9). Total num frames: 1538514944. Throughput: 0: 10549.2. Samples: 134614926. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:18,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:19,930][626795] Updated weights for policy 0, policy_version 187812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:21,568][626795] Updated weights for policy 0, policy_version 187822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:23,350][626795] Updated weights for policy 0, policy_version 187832 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:23,975][24592] Fps is (10 sec: 45056.2, 60 sec: 42052.3, 300 sec: 43042.7). Total num frames: 1538744320. Throughput: 0: 10873.3. Samples: 134684532. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:23,978][24592] Avg episode reward: [(0, '5.106')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:25,075][626795] Updated weights for policy 0, policy_version 187842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:26,796][626795] Updated weights for policy 0, policy_version 187852 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:28,511][626795] Updated weights for policy 0, policy_version 187862 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:28,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42052.5, 300 sec: 43042.7). Total num frames: 1538981888. Throughput: 0: 10913.4. Samples: 134720256. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:28,978][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:30,246][626795] Updated weights for policy 0, policy_version 187872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:32,001][626795] Updated weights for policy 0, policy_version 187882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:33,617][626795] Updated weights for policy 0, policy_version 187892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:33,976][24592] Fps is (10 sec: 47512.5, 60 sec: 44278.1, 300 sec: 43042.7). Total num frames: 1539219456. Throughput: 0: 10939.0. Samples: 134791602. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:33,977][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:35,387][626795] Updated weights for policy 0, policy_version 187902 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:37,112][626795] Updated weights for policy 0, policy_version 187912 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:38,757][626795] Updated weights for policy 0, policy_version 187922 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:38,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44509.9, 300 sec: 43042.7). Total num frames: 1539457024. Throughput: 0: 10945.2. Samples: 134862870. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:38,976][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:40,620][626795] Updated weights for policy 0, policy_version 187932 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:42,218][626795] Updated weights for policy 0, policy_version 187942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:46,298][24592] Fps is (10 sec: 36564.5, 60 sec: 42456.9, 300 sec: 42623.8). Total num frames: 1539670016. Throughput: 0: 10408.6. Samples: 134898516. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:46,299][24592] Avg episode reward: [(0, '4.281')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:46,900][626795] Updated weights for policy 0, policy_version 187952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:48,714][626795] Updated weights for policy 0, policy_version 187962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:48,976][24592] Fps is (10 sec: 33586.3, 60 sec: 42325.2, 300 sec: 42570.6). Total num frames: 1539792896. Throughput: 0: 10166.8. Samples: 134935098. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:48,978][24592] Avg episode reward: [(0, '4.951')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:50,502][626795] Updated weights for policy 0, policy_version 187972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:52,144][626795] Updated weights for policy 0, policy_version 187982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:53,969][626795] Updated weights for policy 0, policy_version 187992 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:53,975][24592] Fps is (10 sec: 46949.1, 60 sec: 42325.3, 300 sec: 42598.4). Total num frames: 1540030464. Throughput: 0: 11470.5. Samples: 135004356. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:53,976][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:55,848][626795] Updated weights for policy 0, policy_version 188002 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:57,491][626795] Updated weights for policy 0, policy_version 188012 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:58,975][24592] Fps is (10 sec: 46695.2, 60 sec: 42052.2, 300 sec: 43098.2). Total num frames: 1540259840. Throughput: 0: 10892.4. Samples: 135038646. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:50:58,976][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:50:59,213][626795] Updated weights for policy 0, policy_version 188022 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:00,929][626795] Updated weights for policy 0, policy_version 188032 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:02,615][626795] Updated weights for policy 0, policy_version 188042 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:03,975][24592] Fps is (10 sec: 46694.0, 60 sec: 42189.0, 300 sec: 43098.3). Total num frames: 1540497408. Throughput: 0: 11015.3. Samples: 135110616. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:03,978][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000188049_1540497408.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:04,051][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000186795_1530224640.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:04,373][626795] Updated weights for policy 0, policy_version 188052 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:06,135][626795] Updated weights for policy 0, policy_version 188062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:07,842][626795] Updated weights for policy 0, policy_version 188072 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:08,976][24592] Fps is (10 sec: 47511.9, 60 sec: 44452.3, 300 sec: 43098.2). Total num frames: 1540734976. Throughput: 0: 11056.7. Samples: 135182088. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:08,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:09,564][626795] Updated weights for policy 0, policy_version 188082 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:11,276][626795] Updated weights for policy 0, policy_version 188092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:13,031][626795] Updated weights for policy 0, policy_version 188102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:13,975][24592] Fps is (10 sec: 47514.1, 60 sec: 44646.5, 300 sec: 43098.3). Total num frames: 1540972544. Throughput: 0: 11048.1. Samples: 135217422. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:13,976][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:14,739][626795] Updated weights for policy 0, policy_version 188112 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:16,566][626795] Updated weights for policy 0, policy_version 188122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:18,173][626795] Updated weights for policy 0, policy_version 188132 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:21,781][24592] Fps is (10 sec: 35186.2, 60 sec: 42521.7, 300 sec: 42609.8). Total num frames: 1541185536. Throughput: 0: 10384.8. Samples: 135288048. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:21,781][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:23,391][626795] Updated weights for policy 0, policy_version 188142 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:23,975][24592] Fps is (10 sec: 31129.4, 60 sec: 42325.3, 300 sec: 42543.8). Total num frames: 1541283840. Throughput: 0: 10140.4. Samples: 135319188. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:23,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:25,196][626795] Updated weights for policy 0, policy_version 188152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:27,190][626795] Updated weights for policy 0, policy_version 188162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:28,975][24592] Fps is (10 sec: 43268.0, 60 sec: 41915.7, 300 sec: 42487.3). Total num frames: 1541496832. Throughput: 0: 10603.8. Samples: 135351060. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:28,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:29,027][626795] Updated weights for policy 0, policy_version 188172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:30,958][626795] Updated weights for policy 0, policy_version 188182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:32,824][626795] Updated weights for policy 0, policy_version 188192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:33,976][24592] Fps is (10 sec: 43416.5, 60 sec: 41642.6, 300 sec: 42959.4). Total num frames: 1541718016. Throughput: 0: 10700.1. Samples: 135416604. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:33,977][24592] Avg episode reward: [(0, '4.564')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:34,591][626795] Updated weights for policy 0, policy_version 188202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:36,305][626795] Updated weights for policy 0, policy_version 188212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:38,106][626795] Updated weights for policy 0, policy_version 188222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:38,975][24592] Fps is (10 sec: 45874.9, 60 sec: 41642.6, 300 sec: 42959.4). Total num frames: 1541955584. Throughput: 0: 10706.4. Samples: 135486144. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:38,977][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:39,676][626795] Updated weights for policy 0, policy_version 188232 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:41,455][626795] Updated weights for policy 0, policy_version 188242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:43,252][626795] Updated weights for policy 0, policy_version 188252 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:43,975][24592] Fps is (10 sec: 46695.8, 60 sec: 43603.6, 300 sec: 42931.6). Total num frames: 1542184960. Throughput: 0: 10755.5. Samples: 135522642. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:43,977][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:44,962][626795] Updated weights for policy 0, policy_version 188262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:46,713][626795] Updated weights for policy 0, policy_version 188272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:48,336][626795] Updated weights for policy 0, policy_version 188282 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:48,976][24592] Fps is (10 sec: 47512.5, 60 sec: 43963.7, 300 sec: 42987.2). Total num frames: 1542430720. Throughput: 0: 10719.0. Samples: 135592974. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:48,976][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:50,193][626795] Updated weights for policy 0, policy_version 188292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:51,839][626795] Updated weights for policy 0, policy_version 188302 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:53,624][626795] Updated weights for policy 0, policy_version 188312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:53,975][24592] Fps is (10 sec: 48332.0, 60 sec: 43963.6, 300 sec: 42987.1). Total num frames: 1542668288. Throughput: 0: 10709.4. Samples: 135664008. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:53,976][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:57,763][626795] Updated weights for policy 0, policy_version 188322 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:58,975][24592] Fps is (10 sec: 36046.1, 60 sec: 42188.9, 300 sec: 42598.5). Total num frames: 1542791168. Throughput: 0: 10090.8. Samples: 135671508. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:51:58,977][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:51:59,470][626795] Updated weights for policy 0, policy_version 188332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:01,119][626795] Updated weights for policy 0, policy_version 188342 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:02,994][626795] Updated weights for policy 0, policy_version 188352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:03,975][24592] Fps is (10 sec: 36045.0, 60 sec: 42188.8, 300 sec: 42598.4). Total num frames: 1543028736. Throughput: 0: 10768.2. Samples: 135742410. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:03,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:04,639][626795] Updated weights for policy 0, policy_version 188362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:06,261][626795] Updated weights for policy 0, policy_version 188372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:08,045][626795] Updated weights for policy 0, policy_version 188382 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:08,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42189.1, 300 sec: 43119.5). Total num frames: 1543266304. Throughput: 0: 10992.2. Samples: 135813834. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:08,976][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:09,800][626795] Updated weights for policy 0, policy_version 188392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:11,510][626795] Updated weights for policy 0, policy_version 188402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:13,243][626795] Updated weights for policy 0, policy_version 188412 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:13,976][24592] Fps is (10 sec: 47512.2, 60 sec: 42188.5, 300 sec: 43126.0). Total num frames: 1543503872. Throughput: 0: 11080.0. Samples: 135849666. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:13,978][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:14,857][626795] Updated weights for policy 0, policy_version 188422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:16,586][626795] Updated weights for policy 0, policy_version 188432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:18,435][626795] Updated weights for policy 0, policy_version 188442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:18,978][24592] Fps is (10 sec: 46682.0, 60 sec: 44542.6, 300 sec: 43125.6). Total num frames: 1543733248. Throughput: 0: 11202.5. Samples: 135920742. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:18,979][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:20,063][626795] Updated weights for policy 0, policy_version 188452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:21,785][626795] Updated weights for policy 0, policy_version 188462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:23,634][626795] Updated weights for policy 0, policy_version 188472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:23,975][24592] Fps is (10 sec: 47515.1, 60 sec: 44919.4, 300 sec: 43126.0). Total num frames: 1543979008. Throughput: 0: 11244.5. Samples: 135992148. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:23,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:25,328][626795] Updated weights for policy 0, policy_version 188482 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:27,061][626795] Updated weights for policy 0, policy_version 188492 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:32,033][24592] Fps is (10 sec: 35138.7, 60 sec: 42741.1, 300 sec: 42628.6). Total num frames: 1544192000. Throughput: 0: 10496.1. Samples: 136027062. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:32,034][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:32,369][626795] Updated weights for policy 0, policy_version 188502 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:33,975][24592] Fps is (10 sec: 30310.6, 60 sec: 42735.1, 300 sec: 42570.6). Total num frames: 1544282112. Throughput: 0: 10304.7. Samples: 136056684. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:33,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:34,184][626795] Updated weights for policy 0, policy_version 188512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:36,098][626795] Updated weights for policy 0, policy_version 188522 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:37,878][626795] Updated weights for policy 0, policy_version 188532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:38,975][24592] Fps is (10 sec: 43662.4, 60 sec: 42325.4, 300 sec: 42487.3). Total num frames: 1544495104. Throughput: 0: 10177.0. Samples: 136121970. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:38,978][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:39,845][626795] Updated weights for policy 0, policy_version 188542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:41,756][626795] Updated weights for policy 0, policy_version 188552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:43,436][626795] Updated weights for policy 0, policy_version 188562 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:43,976][24592] Fps is (10 sec: 43415.3, 60 sec: 42188.4, 300 sec: 42944.2). Total num frames: 1544716288. Throughput: 0: 10734.5. Samples: 136154568. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:43,978][24592] Avg episode reward: [(0, '4.893')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:45,224][626795] Updated weights for policy 0, policy_version 188572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:47,019][626795] Updated weights for policy 0, policy_version 188582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:48,736][626795] Updated weights for policy 0, policy_version 188592 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:48,975][24592] Fps is (10 sec: 45056.3, 60 sec: 41916.0, 300 sec: 42959.6). Total num frames: 1544945664. Throughput: 0: 10710.2. Samples: 136224366. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:48,977][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:50,437][626795] Updated weights for policy 0, policy_version 188602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:52,198][626795] Updated weights for policy 0, policy_version 188612 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:53,482][626772] Signal inference workers to stop experience collection... (1900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:53,489][626772] Signal inference workers to resume experience collection... (1900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:53,500][626795] InferenceWorker_p0-w0: stopping experience collection (1900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:53,505][626795] InferenceWorker_p0-w0: resuming experience collection (1900 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:53,946][626795] Updated weights for policy 0, policy_version 188622 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:53,975][24592] Fps is (10 sec: 47515.9, 60 sec: 42052.3, 300 sec: 42987.2). Total num frames: 1545191424. Throughput: 0: 10710.5. Samples: 136295808. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:53,976][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:55,540][626795] Updated weights for policy 0, policy_version 188632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:57,351][626795] Updated weights for policy 0, policy_version 188642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:58,975][24592] Fps is (10 sec: 48332.9, 60 sec: 43963.8, 300 sec: 42987.2). Total num frames: 1545428992. Throughput: 0: 10690.4. Samples: 136330728. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:52:58,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:52:59,177][626795] Updated weights for policy 0, policy_version 188652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:00,931][626795] Updated weights for policy 0, policy_version 188662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:02,464][626795] Updated weights for policy 0, policy_version 188672 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:03,976][24592] Fps is (10 sec: 47509.9, 60 sec: 43963.2, 300 sec: 42987.2). Total num frames: 1545666560. Throughput: 0: 10700.7. Samples: 136402254. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:03,978][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000188680_1545666560.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:04,040][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000187413_1535287296.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:06,762][626795] Updated weights for policy 0, policy_version 188682 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:08,596][626795] Updated weights for policy 0, policy_version 188692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:08,975][24592] Fps is (10 sec: 35225.2, 60 sec: 41915.6, 300 sec: 42542.9). Total num frames: 1545781248. Throughput: 0: 10024.0. Samples: 136443228. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:08,977][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:10,228][626795] Updated weights for policy 0, policy_version 188702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:11,933][626795] Updated weights for policy 0, policy_version 188712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:13,735][626795] Updated weights for policy 0, policy_version 188722 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:13,976][24592] Fps is (10 sec: 35227.4, 60 sec: 41915.8, 300 sec: 42570.6). Total num frames: 1546018816. Throughput: 0: 10774.2. Samples: 136478958. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:13,977][24592] Avg episode reward: [(0, '4.805')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:15,521][626795] Updated weights for policy 0, policy_version 188732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:17,295][626795] Updated weights for policy 0, policy_version 188742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:18,941][626795] Updated weights for policy 0, policy_version 188752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:18,975][24592] Fps is (10 sec: 47513.9, 60 sec: 42054.1, 300 sec: 43058.5). Total num frames: 1546256384. Throughput: 0: 10939.2. Samples: 136548948. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:18,976][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:20,694][626795] Updated weights for policy 0, policy_version 188762 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:22,279][626795] Updated weights for policy 0, policy_version 188772 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:23,976][24592] Fps is (10 sec: 47513.7, 60 sec: 41915.6, 300 sec: 43098.2). Total num frames: 1546493952. Throughput: 0: 11092.2. Samples: 136621122. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:23,977][24592] Avg episode reward: [(0, '4.914')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:24,074][626795] Updated weights for policy 0, policy_version 188782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:25,808][626795] Updated weights for policy 0, policy_version 188792 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:27,558][626795] Updated weights for policy 0, policy_version 188802 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:28,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44598.4, 300 sec: 43098.3). Total num frames: 1546731520. Throughput: 0: 11161.9. Samples: 136656846. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:28,977][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:29,215][626795] Updated weights for policy 0, policy_version 188812 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:30,991][626795] Updated weights for policy 0, policy_version 188822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:32,683][626795] Updated weights for policy 0, policy_version 188832 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:33,975][24592] Fps is (10 sec: 47514.6, 60 sec: 44782.9, 300 sec: 43098.2). Total num frames: 1546969088. Throughput: 0: 11201.6. Samples: 136728438. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:33,978][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:34,390][626795] Updated weights for policy 0, policy_version 188842 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:36,064][626795] Updated weights for policy 0, policy_version 188852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:37,822][626795] Updated weights for policy 0, policy_version 188862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:38,975][24592] Fps is (10 sec: 46694.0, 60 sec: 45056.0, 300 sec: 43070.6). Total num frames: 1547198464. Throughput: 0: 11177.9. Samples: 136798812. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:38,976][24592] Avg episode reward: [(0, '4.893')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:43,082][626795] Updated weights for policy 0, policy_version 188872 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:43,976][24592] Fps is (10 sec: 31128.9, 60 sec: 42735.1, 300 sec: 42542.8). Total num frames: 1547280384. Throughput: 0: 10489.9. Samples: 136802778. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:43,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:44,960][626795] Updated weights for policy 0, policy_version 188882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:46,799][626795] Updated weights for policy 0, policy_version 188892 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:48,611][626795] Updated weights for policy 0, policy_version 188902 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:48,975][24592] Fps is (10 sec: 30310.7, 60 sec: 42598.4, 300 sec: 42487.3). Total num frames: 1547501568. Throughput: 0: 10188.2. Samples: 136860714. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:48,976][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:50,321][626795] Updated weights for policy 0, policy_version 188912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:52,126][626795] Updated weights for policy 0, policy_version 188922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:53,753][626795] Updated weights for policy 0, policy_version 188932 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:53,976][24592] Fps is (10 sec: 45873.9, 60 sec: 42461.5, 300 sec: 42981.1). Total num frames: 1547739136. Throughput: 0: 10852.8. Samples: 136931610. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:53,977][24592] Avg episode reward: [(0, '4.564')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:55,631][626795] Updated weights for policy 0, policy_version 188942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:57,200][626795] Updated weights for policy 0, policy_version 188952 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:58,976][24592] Fps is (10 sec: 46692.3, 60 sec: 42325.0, 300 sec: 42987.1). Total num frames: 1547968512. Throughput: 0: 10848.1. Samples: 136967124. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:53:58,976][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:53:59,005][626795] Updated weights for policy 0, policy_version 188962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:00,638][626795] Updated weights for policy 0, policy_version 188972 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:02,376][626795] Updated weights for policy 0, policy_version 188982 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:03,975][24592] Fps is (10 sec: 46697.5, 60 sec: 42326.0, 300 sec: 43042.7). Total num frames: 1548206080. Throughput: 0: 10885.5. Samples: 137038794. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:03,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:04,096][626795] Updated weights for policy 0, policy_version 188992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:05,869][626795] Updated weights for policy 0, policy_version 189002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:07,505][626795] Updated weights for policy 0, policy_version 189012 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:08,975][24592] Fps is (10 sec: 48334.9, 60 sec: 44509.9, 300 sec: 43070.5). Total num frames: 1548451840. Throughput: 0: 10881.3. Samples: 137110776. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:08,976][24592] Avg episode reward: [(0, '4.805')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:09,250][626795] Updated weights for policy 0, policy_version 189022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:10,877][626795] Updated weights for policy 0, policy_version 189032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:12,663][626795] Updated weights for policy 0, policy_version 189042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:13,975][24592] Fps is (10 sec: 48332.2, 60 sec: 44510.0, 300 sec: 43070.5). Total num frames: 1548689408. Throughput: 0: 10885.8. Samples: 137146710. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:13,979][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:14,453][626795] Updated weights for policy 0, policy_version 189052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:18,975][24592] Fps is (10 sec: 32768.0, 60 sec: 42052.2, 300 sec: 42570.6). Total num frames: 1548779520. Throughput: 0: 10084.8. Samples: 137182254. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:18,977][24592] Avg episode reward: [(0, '4.819')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:19,186][626795] Updated weights for policy 0, policy_version 189062 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:21,065][626795] Updated weights for policy 0, policy_version 189072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:22,749][626795] Updated weights for policy 0, policy_version 189082 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:23,976][24592] Fps is (10 sec: 32765.9, 60 sec: 42052.0, 300 sec: 42570.6). Total num frames: 1549017088. Throughput: 0: 10043.7. Samples: 137250786. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:23,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:24,503][626795] Updated weights for policy 0, policy_version 189092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:26,295][626795] Updated weights for policy 0, policy_version 189102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:27,990][626795] Updated weights for policy 0, policy_version 189112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:28,976][24592] Fps is (10 sec: 47511.4, 60 sec: 42051.9, 300 sec: 43015.2). Total num frames: 1549254656. Throughput: 0: 10743.0. Samples: 137286216. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:28,977][24592] Avg episode reward: [(0, '4.956')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:29,675][626795] Updated weights for policy 0, policy_version 189122 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:31,346][626795] Updated weights for policy 0, policy_version 189132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:33,111][626795] Updated weights for policy 0, policy_version 189142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:33,976][24592] Fps is (10 sec: 47514.8, 60 sec: 42052.0, 300 sec: 43070.4). Total num frames: 1549492224. Throughput: 0: 11056.0. Samples: 137358240. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:33,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:34,837][626795] Updated weights for policy 0, policy_version 189152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:36,473][626795] Updated weights for policy 0, policy_version 189162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:38,251][626795] Updated weights for policy 0, policy_version 189172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:38,975][24592] Fps is (10 sec: 47515.9, 60 sec: 42188.9, 300 sec: 43070.5). Total num frames: 1549729792. Throughput: 0: 11075.1. Samples: 137429982. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:38,977][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:39,885][626795] Updated weights for policy 0, policy_version 189182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:41,651][626795] Updated weights for policy 0, policy_version 189192 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:43,403][626795] Updated weights for policy 0, policy_version 189202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:43,976][24592] Fps is (10 sec: 47513.9, 60 sec: 44782.9, 300 sec: 43098.2). Total num frames: 1549967360. Throughput: 0: 11086.1. Samples: 137466000. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:43,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:45,033][626795] Updated weights for policy 0, policy_version 189212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:46,823][626795] Updated weights for policy 0, policy_version 189222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:48,579][626795] Updated weights for policy 0, policy_version 189232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:48,976][24592] Fps is (10 sec: 46691.8, 60 sec: 44919.1, 300 sec: 43070.4). Total num frames: 1550196736. Throughput: 0: 11055.9. Samples: 137536314. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:48,977][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:53,548][626795] Updated weights for policy 0, policy_version 189242 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:53,976][24592] Fps is (10 sec: 31949.0, 60 sec: 42462.0, 300 sec: 42542.8). Total num frames: 1550286848. Throughput: 0: 10196.7. Samples: 137569632. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:53,977][24592] Avg episode reward: [(0, '5.125')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:55,370][626795] Updated weights for policy 0, policy_version 189252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:57,172][626795] Updated weights for policy 0, policy_version 189262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:54:58,828][626795] Updated weights for policy 0, policy_version 189272 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:58,978][24592] Fps is (10 sec: 31941.6, 60 sec: 42460.2, 300 sec: 42542.5). Total num frames: 1550516224. Throughput: 0: 10156.5. Samples: 137603778. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:54:58,980][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:00,589][626795] Updated weights for policy 0, policy_version 189282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:02,196][626795] Updated weights for policy 0, policy_version 189292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:03,972][626795] Updated weights for policy 0, policy_version 189302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:03,975][24592] Fps is (10 sec: 47515.5, 60 sec: 42598.4, 300 sec: 43020.7). Total num frames: 1550761984. Throughput: 0: 10957.6. Samples: 137675346. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:03,977][24592] Avg episode reward: [(0, '4.844')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000189302_1550761984.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:04,035][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000188049_1540497408.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:05,745][626795] Updated weights for policy 0, policy_version 189312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:07,362][626795] Updated weights for policy 0, policy_version 189322 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:08,975][24592] Fps is (10 sec: 48346.2, 60 sec: 42461.9, 300 sec: 43070.5). Total num frames: 1550999552. Throughput: 0: 11021.8. Samples: 137746758. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:08,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:09,117][626795] Updated weights for policy 0, policy_version 189332 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:10,913][626795] Updated weights for policy 0, policy_version 189342 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:12,577][626795] Updated weights for policy 0, policy_version 189352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:13,976][24592] Fps is (10 sec: 47512.1, 60 sec: 42461.7, 300 sec: 43126.0). Total num frames: 1551237120. Throughput: 0: 11022.7. Samples: 137782236. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:13,978][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:14,267][626795] Updated weights for policy 0, policy_version 189362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:16,063][626795] Updated weights for policy 0, policy_version 189372 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:17,746][626795] Updated weights for policy 0, policy_version 189382 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:18,975][24592] Fps is (10 sec: 46694.7, 60 sec: 44783.0, 300 sec: 43126.0). Total num frames: 1551466496. Throughput: 0: 11008.8. Samples: 137853630. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:18,976][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:19,522][626795] Updated weights for policy 0, policy_version 189392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:21,242][626795] Updated weights for policy 0, policy_version 189402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:22,886][626795] Updated weights for policy 0, policy_version 189412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:23,976][24592] Fps is (10 sec: 46695.0, 60 sec: 44783.4, 300 sec: 43126.0). Total num frames: 1551704064. Throughput: 0: 10981.3. Samples: 137924142. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:23,977][24592] Avg episode reward: [(0, '4.892')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:24,769][626795] Updated weights for policy 0, policy_version 189422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:28,975][24592] Fps is (10 sec: 32767.8, 60 sec: 42325.6, 300 sec: 42626.2). Total num frames: 1551794176. Throughput: 0: 10532.6. Samples: 137939964. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:28,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:29,791][626795] Updated weights for policy 0, policy_version 189432 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:31,533][626795] Updated weights for policy 0, policy_version 189442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:33,274][626795] Updated weights for policy 0, policy_version 189452 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:33,975][24592] Fps is (10 sec: 31949.4, 60 sec: 42189.2, 300 sec: 42598.4). Total num frames: 1552023552. Throughput: 0: 10110.4. Samples: 137991276. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:33,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:34,978][626795] Updated weights for policy 0, policy_version 189462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:36,796][626795] Updated weights for policy 0, policy_version 189472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:38,444][626795] Updated weights for policy 0, policy_version 189482 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:38,975][24592] Fps is (10 sec: 45875.5, 60 sec: 42052.3, 300 sec: 42992.4). Total num frames: 1552252928. Throughput: 0: 10946.6. Samples: 138062226. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:38,976][24592] Avg episode reward: [(0, '4.931')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:40,181][626795] Updated weights for policy 0, policy_version 189492 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:41,971][626795] Updated weights for policy 0, policy_version 189502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:43,554][626795] Updated weights for policy 0, policy_version 189512 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:43,976][24592] Fps is (10 sec: 47512.0, 60 sec: 42188.9, 300 sec: 43070.5). Total num frames: 1552498688. Throughput: 0: 10982.3. Samples: 138097956. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:43,978][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:45,312][626795] Updated weights for policy 0, policy_version 189522 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:47,040][626795] Updated weights for policy 0, policy_version 189532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:48,790][626795] Updated weights for policy 0, policy_version 189542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:48,975][24592] Fps is (10 sec: 48332.4, 60 sec: 42325.7, 300 sec: 43070.5). Total num frames: 1552736256. Throughput: 0: 10981.3. Samples: 138169506. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:48,976][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:50,455][626795] Updated weights for policy 0, policy_version 189552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:52,327][626795] Updated weights for policy 0, policy_version 189562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:53,993][24592] Fps is (10 sec: 46611.9, 60 sec: 44633.3, 300 sec: 43067.9). Total num frames: 1552965632. Throughput: 0: 10941.0. Samples: 138239298. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:53,994][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:54,013][626795] Updated weights for policy 0, policy_version 189572 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:55,721][626795] Updated weights for policy 0, policy_version 189582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:57,496][626795] Updated weights for policy 0, policy_version 189592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:58,976][24592] Fps is (10 sec: 46691.7, 60 sec: 44784.5, 300 sec: 43070.4). Total num frames: 1553203200. Throughput: 0: 10955.3. Samples: 138275226. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:55:58,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:55:59,303][626795] Updated weights for policy 0, policy_version 189602 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:03,975][24592] Fps is (10 sec: 32006.5, 60 sec: 42052.3, 300 sec: 42542.9). Total num frames: 1553285120. Throughput: 0: 10306.7. Samples: 138317430. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:03,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:04,288][626795] Updated weights for policy 0, policy_version 189612 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:06,188][626795] Updated weights for policy 0, policy_version 189622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:07,942][626795] Updated weights for policy 0, policy_version 189632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:08,975][24592] Fps is (10 sec: 31131.6, 60 sec: 41915.7, 300 sec: 42515.1). Total num frames: 1553514496. Throughput: 0: 10037.2. Samples: 138375816. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:08,977][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:09,720][626795] Updated weights for policy 0, policy_version 189642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:11,374][626795] Updated weights for policy 0, policy_version 189652 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:13,053][626795] Updated weights for policy 0, policy_version 189662 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:13,975][24592] Fps is (10 sec: 46694.4, 60 sec: 41915.9, 300 sec: 43007.4). Total num frames: 1553752064. Throughput: 0: 10468.4. Samples: 138411042. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:13,977][24592] Avg episode reward: [(0, '4.945')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:14,807][626795] Updated weights for policy 0, policy_version 189672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:16,491][626795] Updated weights for policy 0, policy_version 189682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:18,328][626795] Updated weights for policy 0, policy_version 189692 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:18,975][24592] Fps is (10 sec: 46694.2, 60 sec: 41915.7, 300 sec: 43042.7). Total num frames: 1553981440. Throughput: 0: 10928.5. Samples: 138483060. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:18,977][24592] Avg episode reward: [(0, '4.816')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:20,005][626795] Updated weights for policy 0, policy_version 189702 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:21,673][626795] Updated weights for policy 0, policy_version 189712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:23,379][626795] Updated weights for policy 0, policy_version 189722 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:23,975][24592] Fps is (10 sec: 46694.5, 60 sec: 41915.9, 300 sec: 43126.0). Total num frames: 1554219008. Throughput: 0: 10931.6. Samples: 138554148. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:23,977][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:25,175][626795] Updated weights for policy 0, policy_version 189732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:26,878][626795] Updated weights for policy 0, policy_version 189742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:28,578][626795] Updated weights for policy 0, policy_version 189752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:28,975][24592] Fps is (10 sec: 48333.2, 60 sec: 44509.9, 300 sec: 43209.4). Total num frames: 1554464768. Throughput: 0: 10924.0. Samples: 138589530. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:28,976][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:30,342][626795] Updated weights for policy 0, policy_version 189762 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:32,115][626795] Updated weights for policy 0, policy_version 189772 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:33,796][626795] Updated weights for policy 0, policy_version 189782 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:33,976][24592] Fps is (10 sec: 48331.7, 60 sec: 44646.2, 300 sec: 43209.3). Total num frames: 1554702336. Throughput: 0: 10921.2. Samples: 138660960. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:33,981][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:35,597][626795] Updated weights for policy 0, policy_version 189792 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:38,977][24592] Fps is (10 sec: 31123.9, 60 sec: 42051.0, 300 sec: 42681.4). Total num frames: 1554776064. Throughput: 0: 10162.1. Samples: 138696426. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:38,978][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:40,634][626795] Updated weights for policy 0, policy_version 189802 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:42,383][626795] Updated weights for policy 0, policy_version 189812 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:43,975][24592] Fps is (10 sec: 31130.3, 60 sec: 41916.0, 300 sec: 42654.0). Total num frames: 1555013632. Throughput: 0: 10029.5. Samples: 138726546. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:43,977][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:44,225][626795] Updated weights for policy 0, policy_version 189822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:45,987][626795] Updated weights for policy 0, policy_version 189832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:47,598][626795] Updated weights for policy 0, policy_version 189842 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:48,975][24592] Fps is (10 sec: 46702.8, 60 sec: 41779.2, 300 sec: 42626.2). Total num frames: 1555243008. Throughput: 0: 10659.7. Samples: 138797118. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:48,976][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:49,419][626795] Updated weights for policy 0, policy_version 189852 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:51,093][626795] Updated weights for policy 0, policy_version 189862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:52,853][626795] Updated weights for policy 0, policy_version 189872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:53,976][24592] Fps is (10 sec: 46693.7, 60 sec: 41928.2, 300 sec: 43014.9). Total num frames: 1555480576. Throughput: 0: 10936.2. Samples: 138867948. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:53,977][24592] Avg episode reward: [(0, '4.415')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:54,622][626795] Updated weights for policy 0, policy_version 189882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:56,349][626795] Updated weights for policy 0, policy_version 189892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:58,025][626795] Updated weights for policy 0, policy_version 189902 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:58,976][24592] Fps is (10 sec: 46693.1, 60 sec: 41779.4, 300 sec: 42987.1). Total num frames: 1555709952. Throughput: 0: 10937.4. Samples: 138903228. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:56:58,978][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:56:59,807][626795] Updated weights for policy 0, policy_version 189912 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:01,587][626795] Updated weights for policy 0, policy_version 189922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:03,209][626795] Updated weights for policy 0, policy_version 189932 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:03,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44509.8, 300 sec: 43014.9). Total num frames: 1555955712. Throughput: 0: 10914.8. Samples: 138974226. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:03,976][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000189936_1555955712.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:04,038][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000188680_1545666560.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:05,038][626795] Updated weights for policy 0, policy_version 189942 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:06,747][626795] Updated weights for policy 0, policy_version 189952 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:08,489][626795] Updated weights for policy 0, policy_version 189962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:08,975][24592] Fps is (10 sec: 48334.2, 60 sec: 44646.4, 300 sec: 43015.0). Total num frames: 1556193280. Throughput: 0: 10915.7. Samples: 139045356. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:08,977][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:10,376][626795] Updated weights for policy 0, policy_version 189972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:14,216][24592] Fps is (10 sec: 31199.2, 60 sec: 41884.5, 300 sec: 42480.9). Total num frames: 1556275200. Throughput: 0: 10075.6. Samples: 139045356. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:14,218][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:15,346][626795] Updated weights for policy 0, policy_version 189982 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:17,138][626795] Updated weights for policy 0, policy_version 189992 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:18,975][24592] Fps is (10 sec: 29491.3, 60 sec: 41779.2, 300 sec: 42404.0). Total num frames: 1556488192. Throughput: 0: 9952.2. Samples: 139108806. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:18,976][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:19,115][626795] Updated weights for policy 0, policy_version 190002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:20,883][626795] Updated weights for policy 0, policy_version 190012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:22,591][626795] Updated weights for policy 0, policy_version 190022 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:23,976][24592] Fps is (10 sec: 46164.7, 60 sec: 41779.1, 300 sec: 42932.3). Total num frames: 1556725760. Throughput: 0: 10704.9. Samples: 139178130. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:23,978][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:24,332][626795] Updated weights for policy 0, policy_version 190032 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:26,059][626795] Updated weights for policy 0, policy_version 190042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:27,795][626795] Updated weights for policy 0, policy_version 190052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:28,976][24592] Fps is (10 sec: 46692.9, 60 sec: 41505.9, 300 sec: 42959.4). Total num frames: 1556955136. Throughput: 0: 10816.9. Samples: 139213308. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:28,977][24592] Avg episode reward: [(0, '4.911')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:29,680][626795] Updated weights for policy 0, policy_version 190062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:31,292][626795] Updated weights for policy 0, policy_version 190072 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:33,050][626795] Updated weights for policy 0, policy_version 190082 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:33,975][24592] Fps is (10 sec: 45876.0, 60 sec: 41369.7, 300 sec: 43014.9). Total num frames: 1557184512. Throughput: 0: 10804.1. Samples: 139283304. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:33,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:34,876][626795] Updated weights for policy 0, policy_version 190092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:36,636][626795] Updated weights for policy 0, policy_version 190102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:38,409][626795] Updated weights for policy 0, policy_version 190112 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:38,975][24592] Fps is (10 sec: 45876.8, 60 sec: 43965.1, 300 sec: 43042.8). Total num frames: 1557413888. Throughput: 0: 10767.4. Samples: 139352478. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:38,978][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:40,242][626795] Updated weights for policy 0, policy_version 190122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:41,980][626795] Updated weights for policy 0, policy_version 190132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:43,759][626795] Updated weights for policy 0, policy_version 190142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:43,976][24592] Fps is (10 sec: 46693.6, 60 sec: 43963.6, 300 sec: 43070.5). Total num frames: 1557651456. Throughput: 0: 10749.8. Samples: 139386966. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:43,976][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:45,564][626795] Updated weights for policy 0, policy_version 190152 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:48,975][24592] Fps is (10 sec: 35225.5, 60 sec: 42052.3, 300 sec: 42626.2). Total num frames: 1557766144. Throughput: 0: 10420.3. Samples: 139443138. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:48,977][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:49,735][626795] Updated weights for policy 0, policy_version 190162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:51,588][626795] Updated weights for policy 0, policy_version 190172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:53,254][626795] Updated weights for policy 0, policy_version 190182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:53,975][24592] Fps is (10 sec: 34407.0, 60 sec: 41915.8, 300 sec: 42598.4). Total num frames: 1557995520. Throughput: 0: 10051.9. Samples: 139497690. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:53,977][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:55,070][626795] Updated weights for policy 0, policy_version 190192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:56,711][626795] Updated weights for policy 0, policy_version 190202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:57:58,381][626795] Updated weights for policy 0, policy_version 190212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:58,975][24592] Fps is (10 sec: 47512.8, 60 sec: 42188.9, 300 sec: 42626.3). Total num frames: 1558241280. Throughput: 0: 10900.6. Samples: 139533264. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:57:58,977][24592] Avg episode reward: [(0, '4.937')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:00,168][626795] Updated weights for policy 0, policy_version 190222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:01,911][626795] Updated weights for policy 0, policy_version 190232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:03,565][626795] Updated weights for policy 0, policy_version 190242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:03,975][24592] Fps is (10 sec: 47513.8, 60 sec: 41915.8, 300 sec: 43015.0). Total num frames: 1558470656. Throughput: 0: 11016.7. Samples: 139604556. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:03,976][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:05,352][626795] Updated weights for policy 0, policy_version 190252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:07,104][626795] Updated weights for policy 0, policy_version 190262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:08,439][626772] Signal inference workers to stop experience collection... (1950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:08,440][626772] Signal inference workers to resume experience collection... (1950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:08,448][626795] InferenceWorker_p0-w0: stopping experience collection (1950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:08,450][626795] InferenceWorker_p0-w0: resuming experience collection (1950 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:08,720][626795] Updated weights for policy 0, policy_version 190272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:08,976][24592] Fps is (10 sec: 46694.3, 60 sec: 41915.6, 300 sec: 43015.0). Total num frames: 1558708224. Throughput: 0: 11087.6. Samples: 139677072. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:08,976][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:10,460][626795] Updated weights for policy 0, policy_version 190282 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:12,198][626795] Updated weights for policy 0, policy_version 190292 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:13,905][626795] Updated weights for policy 0, policy_version 190302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:13,975][24592] Fps is (10 sec: 48332.6, 60 sec: 44825.9, 300 sec: 43042.7). Total num frames: 1558953984. Throughput: 0: 11086.7. Samples: 139712208. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:13,978][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:15,519][626795] Updated weights for policy 0, policy_version 190312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:17,338][626795] Updated weights for policy 0, policy_version 190322 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:18,976][24592] Fps is (10 sec: 48330.0, 60 sec: 45055.4, 300 sec: 43042.6). Total num frames: 1559191552. Throughput: 0: 11130.7. Samples: 139784196. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:18,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:19,055][626795] Updated weights for policy 0, policy_version 190332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:20,744][626795] Updated weights for policy 0, policy_version 190342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:24,465][24592] Fps is (10 sec: 32018.7, 60 sec: 42389.0, 300 sec: 42500.1). Total num frames: 1559289856. Throughput: 0: 10274.9. Samples: 139819884. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:24,467][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:25,822][626795] Updated weights for policy 0, policy_version 190352 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:27,537][626795] Updated weights for policy 0, policy_version 190362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:28,975][24592] Fps is (10 sec: 31951.3, 60 sec: 42598.6, 300 sec: 42515.1). Total num frames: 1559511040. Throughput: 0: 10312.6. Samples: 139851030. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:28,977][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:29,321][626795] Updated weights for policy 0, policy_version 190372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:31,066][626795] Updated weights for policy 0, policy_version 190382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:32,755][626795] Updated weights for policy 0, policy_version 190392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:33,976][24592] Fps is (10 sec: 48236.1, 60 sec: 42734.6, 300 sec: 42542.8). Total num frames: 1559748608. Throughput: 0: 10652.0. Samples: 139922484. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:33,976][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:34,482][626795] Updated weights for policy 0, policy_version 190402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:36,135][626795] Updated weights for policy 0, policy_version 190412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:37,874][626795] Updated weights for policy 0, policy_version 190422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:38,976][24592] Fps is (10 sec: 47512.3, 60 sec: 42871.3, 300 sec: 43070.5). Total num frames: 1559986176. Throughput: 0: 11035.8. Samples: 139994304. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:38,977][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:39,600][626795] Updated weights for policy 0, policy_version 190432 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:41,278][626795] Updated weights for policy 0, policy_version 190442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:42,989][626795] Updated weights for policy 0, policy_version 190452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:43,975][24592] Fps is (10 sec: 47515.4, 60 sec: 42871.6, 300 sec: 43126.0). Total num frames: 1560223744. Throughput: 0: 11040.2. Samples: 140030070. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:43,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:44,770][626795] Updated weights for policy 0, policy_version 190462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:46,325][626795] Updated weights for policy 0, policy_version 190472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:48,145][626795] Updated weights for policy 0, policy_version 190482 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:48,975][24592] Fps is (10 sec: 47514.7, 60 sec: 44919.4, 300 sec: 43126.1). Total num frames: 1560461312. Throughput: 0: 11058.0. Samples: 140102166. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:48,978][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:49,929][626795] Updated weights for policy 0, policy_version 190492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:51,508][626795] Updated weights for policy 0, policy_version 190502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:53,339][626795] Updated weights for policy 0, policy_version 190512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:53,975][24592] Fps is (10 sec: 47514.1, 60 sec: 45056.0, 300 sec: 43153.9). Total num frames: 1560698880. Throughput: 0: 11027.0. Samples: 140173284. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:58:53,976][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:58:55,042][626795] Updated weights for policy 0, policy_version 190522 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:00,299][24592] Fps is (10 sec: 32553.8, 60 sec: 42213.1, 300 sec: 42601.6). Total num frames: 1560829952. Throughput: 0: 10715.1. Samples: 140208576. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:00,301][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:00,397][626795] Updated weights for policy 0, policy_version 190532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:02,081][626795] Updated weights for policy 0, policy_version 190542 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:03,710][626795] Updated weights for policy 0, policy_version 190552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:03,975][24592] Fps is (10 sec: 31129.3, 60 sec: 42325.3, 300 sec: 42570.6). Total num frames: 1561010176. Throughput: 0: 10074.0. Samples: 140237520. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:03,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000190553_1561010176.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:04,043][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000189302_1550761984.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:05,488][626795] Updated weights for policy 0, policy_version 190562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:07,339][626795] Updated weights for policy 0, policy_version 190572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:08,955][626795] Updated weights for policy 0, policy_version 190582 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:08,975][24592] Fps is (10 sec: 48155.5, 60 sec: 42325.5, 300 sec: 42570.7). Total num frames: 1561247744. Throughput: 0: 10986.8. Samples: 140308908. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:08,976][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:10,641][626795] Updated weights for policy 0, policy_version 190592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:12,328][626795] Updated weights for policy 0, policy_version 190602 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:13,975][24592] Fps is (10 sec: 47514.0, 60 sec: 42188.8, 300 sec: 43070.5). Total num frames: 1561485312. Throughput: 0: 10969.1. Samples: 140344638. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:13,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:14,123][626795] Updated weights for policy 0, policy_version 190612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:15,845][626795] Updated weights for policy 0, policy_version 190622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:17,547][626795] Updated weights for policy 0, policy_version 190632 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:18,975][24592] Fps is (10 sec: 47513.2, 60 sec: 42189.3, 300 sec: 43070.6). Total num frames: 1561722880. Throughput: 0: 10985.6. Samples: 140416830. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:18,978][24592] Avg episode reward: [(0, '4.358')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:19,228][626795] Updated weights for policy 0, policy_version 190642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:20,954][626795] Updated weights for policy 0, policy_version 190652 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:22,763][626795] Updated weights for policy 0, policy_version 190662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:23,976][24592] Fps is (10 sec: 47511.8, 60 sec: 44876.0, 300 sec: 43070.5). Total num frames: 1561960448. Throughput: 0: 10977.0. Samples: 140488272. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:23,979][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:24,352][626795] Updated weights for policy 0, policy_version 190672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:26,075][626795] Updated weights for policy 0, policy_version 190682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:27,883][626795] Updated weights for policy 0, policy_version 190692 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:28,975][24592] Fps is (10 sec: 47514.0, 60 sec: 44783.0, 300 sec: 43070.6). Total num frames: 1562198016. Throughput: 0: 10971.1. Samples: 140523768. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:28,976][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:29,576][626795] Updated weights for policy 0, policy_version 190702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:31,260][626795] Updated weights for policy 0, policy_version 190712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:36,131][24592] Fps is (10 sec: 33698.2, 60 sec: 42176.0, 300 sec: 42537.5). Total num frames: 1562370048. Throughput: 0: 9694.5. Samples: 140559312. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:36,131][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:36,607][626795] Updated weights for policy 0, policy_version 190722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:38,238][626795] Updated weights for policy 0, policy_version 190732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:38,975][24592] Fps is (10 sec: 31129.2, 60 sec: 42052.4, 300 sec: 42515.1). Total num frames: 1562509312. Throughput: 0: 10019.4. Samples: 140624160. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:38,977][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:40,013][626795] Updated weights for policy 0, policy_version 190742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:41,794][626795] Updated weights for policy 0, policy_version 190752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:43,460][626795] Updated weights for policy 0, policy_version 190762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:43,975][24592] Fps is (10 sec: 48036.7, 60 sec: 42052.3, 300 sec: 42542.9). Total num frames: 1562746880. Throughput: 0: 10321.6. Samples: 140659380. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:43,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:45,175][626795] Updated weights for policy 0, policy_version 190772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:46,987][626795] Updated weights for policy 0, policy_version 190782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:48,649][626795] Updated weights for policy 0, policy_version 190792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:48,978][24592] Fps is (10 sec: 46684.7, 60 sec: 41914.2, 300 sec: 43014.7). Total num frames: 1562976256. Throughput: 0: 10963.1. Samples: 140730882. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:48,979][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:50,343][626795] Updated weights for policy 0, policy_version 190802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:52,027][626795] Updated weights for policy 0, policy_version 190812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:53,718][626795] Updated weights for policy 0, policy_version 190822 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:53,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42052.2, 300 sec: 43070.9). Total num frames: 1563222016. Throughput: 0: 10979.8. Samples: 140803002. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:53,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:55,510][626795] Updated weights for policy 0, policy_version 190832 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:57,196][626795] Updated weights for policy 0, policy_version 190842 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 11:59:58,831][626795] Updated weights for policy 0, policy_version 190852 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:58,975][24592] Fps is (10 sec: 49162.5, 60 sec: 44955.8, 300 sec: 43070.5). Total num frames: 1563467776. Throughput: 0: 10978.7. Samples: 140838678. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 11:59:58,977][24592] Avg episode reward: [(0, '4.885')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:00,604][626795] Updated weights for policy 0, policy_version 190862 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:02,341][626795] Updated weights for policy 0, policy_version 190872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:03,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44783.0, 300 sec: 43042.7). Total num frames: 1563697152. Throughput: 0: 10972.8. Samples: 140910606. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:03,976][24592] Avg episode reward: [(0, '4.878')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:04,031][626795] Updated weights for policy 0, policy_version 190882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:05,801][626795] Updated weights for policy 0, policy_version 190892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:07,504][626795] Updated weights for policy 0, policy_version 190902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:11,968][24592] Fps is (10 sec: 34048.5, 60 sec: 42265.4, 300 sec: 42528.1). Total num frames: 1563910144. Throughput: 0: 9543.8. Samples: 140946300. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:11,973][24592] Avg episode reward: [(0, '4.916')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:12,761][626795] Updated weights for policy 0, policy_version 190912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:13,975][24592] Fps is (10 sec: 31129.8, 60 sec: 42052.3, 300 sec: 42515.1). Total num frames: 1564008448. Throughput: 0: 10116.3. Samples: 140979000. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:13,977][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:14,520][626795] Updated weights for policy 0, policy_version 190922 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:16,272][626795] Updated weights for policy 0, policy_version 190932 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:17,883][626795] Updated weights for policy 0, policy_version 190942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:18,976][24592] Fps is (10 sec: 46757.7, 60 sec: 41915.5, 300 sec: 42487.3). Total num frames: 1564237824. Throughput: 0: 11370.2. Samples: 141046470. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:18,977][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:19,716][626795] Updated weights for policy 0, policy_version 190952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:21,396][626795] Updated weights for policy 0, policy_version 190962 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:23,117][626795] Updated weights for policy 0, policy_version 190972 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:23,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42052.5, 300 sec: 43014.9). Total num frames: 1564483584. Throughput: 0: 10975.6. Samples: 141118062. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:23,976][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:24,808][626795] Updated weights for policy 0, policy_version 190982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:26,514][626795] Updated weights for policy 0, policy_version 190992 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:28,273][626795] Updated weights for policy 0, policy_version 191002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:28,975][24592] Fps is (10 sec: 48334.0, 60 sec: 42052.2, 300 sec: 43042.7). Total num frames: 1564721152. Throughput: 0: 10986.1. Samples: 141153756. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:28,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:29,986][626795] Updated weights for policy 0, policy_version 191012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:31,706][626795] Updated weights for policy 0, policy_version 191022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:33,401][626795] Updated weights for policy 0, policy_version 191032 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:33,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44610.5, 300 sec: 43042.7). Total num frames: 1564950528. Throughput: 0: 10977.5. Samples: 141224844. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:33,977][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:35,055][626795] Updated weights for policy 0, policy_version 191042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:36,851][626795] Updated weights for policy 0, policy_version 191052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:38,586][626795] Updated weights for policy 0, policy_version 191062 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:38,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44782.9, 300 sec: 43042.8). Total num frames: 1565196288. Throughput: 0: 10971.7. Samples: 141296730. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:38,977][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:40,228][626795] Updated weights for policy 0, policy_version 191072 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:42,017][626795] Updated weights for policy 0, policy_version 191082 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:43,661][626795] Updated weights for policy 0, policy_version 191092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:43,975][24592] Fps is (10 sec: 48332.5, 60 sec: 44782.9, 300 sec: 43042.7). Total num frames: 1565433856. Throughput: 0: 10976.4. Samples: 141332616. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:43,977][24592] Avg episode reward: [(0, '4.505')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:48,975][24592] Fps is (10 sec: 30310.4, 60 sec: 42053.7, 300 sec: 42489.9). Total num frames: 1565499392. Throughput: 0: 10078.4. Samples: 141364134. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:48,978][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:48,986][626795] Updated weights for policy 0, policy_version 191102 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:50,812][626795] Updated weights for policy 0, policy_version 191112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:52,475][626795] Updated weights for policy 0, policy_version 191122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:53,975][24592] Fps is (10 sec: 29491.5, 60 sec: 41779.2, 300 sec: 42459.7). Total num frames: 1565728768. Throughput: 0: 11556.6. Samples: 141431766. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:53,977][24592] Avg episode reward: [(0, '4.928')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:54,160][626795] Updated weights for policy 0, policy_version 191132 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:56,032][626795] Updated weights for policy 0, policy_version 191142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:57,712][626795] Updated weights for policy 0, policy_version 191152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:58,976][24592] Fps is (10 sec: 47512.3, 60 sec: 41779.0, 300 sec: 43014.9). Total num frames: 1565974528. Throughput: 0: 10855.1. Samples: 141467484. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:00:58,978][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:00:59,492][626795] Updated weights for policy 0, policy_version 191162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:01,137][626795] Updated weights for policy 0, policy_version 191172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:02,759][626795] Updated weights for policy 0, policy_version 191182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:03,975][24592] Fps is (10 sec: 48331.9, 60 sec: 41915.7, 300 sec: 43042.7). Total num frames: 1566212096. Throughput: 0: 10949.8. Samples: 141539208. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:03,980][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:04,001][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000191189_1566220288.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:04,059][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000189936_1555955712.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:04,611][626795] Updated weights for policy 0, policy_version 191192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:06,344][626795] Updated weights for policy 0, policy_version 191202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:07,946][626795] Updated weights for policy 0, policy_version 191212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:08,976][24592] Fps is (10 sec: 47510.3, 60 sec: 44546.2, 300 sec: 43042.6). Total num frames: 1566449664. Throughput: 0: 10949.0. Samples: 141610776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:08,977][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:09,655][626795] Updated weights for policy 0, policy_version 191222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:11,416][626795] Updated weights for policy 0, policy_version 191232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:13,172][626795] Updated weights for policy 0, policy_version 191242 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:13,975][24592] Fps is (10 sec: 47514.0, 60 sec: 44646.3, 300 sec: 43070.5). Total num frames: 1566687232. Throughput: 0: 10951.6. Samples: 141646578. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:13,976][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:14,822][626795] Updated weights for policy 0, policy_version 191252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:16,591][626795] Updated weights for policy 0, policy_version 191262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:18,286][626795] Updated weights for policy 0, policy_version 191272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:18,975][24592] Fps is (10 sec: 47518.8, 60 sec: 44783.2, 300 sec: 43070.5). Total num frames: 1566924800. Throughput: 0: 10958.3. Samples: 141717966. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:18,976][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:20,014][626795] Updated weights for policy 0, policy_version 191282 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:23,975][24592] Fps is (10 sec: 31129.9, 60 sec: 41915.8, 300 sec: 42487.3). Total num frames: 1566998528. Throughput: 0: 10006.2. Samples: 141747006. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:23,976][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:25,344][626795] Updated weights for policy 0, policy_version 191292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:26,992][626795] Updated weights for policy 0, policy_version 191302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:28,728][626795] Updated weights for policy 0, policy_version 191312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:28,976][24592] Fps is (10 sec: 31128.4, 60 sec: 41915.6, 300 sec: 42487.3). Total num frames: 1567236096. Throughput: 0: 10013.4. Samples: 141783222. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:28,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:30,480][626795] Updated weights for policy 0, policy_version 191322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:32,115][626795] Updated weights for policy 0, policy_version 191332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:33,892][626795] Updated weights for policy 0, policy_version 191342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:33,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42052.3, 300 sec: 43043.0). Total num frames: 1567473664. Throughput: 0: 10893.4. Samples: 141854334. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:33,977][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:35,601][626795] Updated weights for policy 0, policy_version 191352 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:37,289][626795] Updated weights for policy 0, policy_version 191362 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:38,976][24592] Fps is (10 sec: 47514.5, 60 sec: 41915.7, 300 sec: 43042.7). Total num frames: 1567711232. Throughput: 0: 10973.6. Samples: 141925578. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:38,977][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:39,059][626795] Updated weights for policy 0, policy_version 191372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:40,774][626795] Updated weights for policy 0, policy_version 191382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:42,426][626795] Updated weights for policy 0, policy_version 191392 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 41915.8, 300 sec: 43070.5). Total num frames: 1567948800. Throughput: 0: 10977.0. Samples: 141961446. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:43,977][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:44,223][626795] Updated weights for policy 0, policy_version 191402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:45,903][626795] Updated weights for policy 0, policy_version 191412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:47,758][626795] Updated weights for policy 0, policy_version 191422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:48,975][24592] Fps is (10 sec: 47514.3, 60 sec: 44783.0, 300 sec: 43070.5). Total num frames: 1568186368. Throughput: 0: 10962.2. Samples: 142032504. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:48,977][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:49,266][626795] Updated weights for policy 0, policy_version 191432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:51,012][626795] Updated weights for policy 0, policy_version 191442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:52,810][626795] Updated weights for policy 0, policy_version 191452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:53,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44919.5, 300 sec: 43098.3). Total num frames: 1568423936. Throughput: 0: 10967.2. Samples: 142104288. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:53,977][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:54,568][626795] Updated weights for policy 0, policy_version 191462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:59,325][24592] Fps is (10 sec: 31659.9, 60 sec: 42080.1, 300 sec: 42520.2). Total num frames: 1568514048. Throughput: 0: 10092.8. Samples: 142104288. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:01:59,326][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:01:59,858][626795] Updated weights for policy 0, policy_version 191472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:01,548][626795] Updated weights for policy 0, policy_version 191482 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:03,091][626795] Updated weights for policy 0, policy_version 191492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:03,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42052.4, 300 sec: 42515.1). Total num frames: 1568735232. Throughput: 0: 10037.1. Samples: 142169634. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:03,976][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:04,991][626795] Updated weights for policy 0, policy_version 191502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:06,710][626795] Updated weights for policy 0, policy_version 191512 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:08,398][626795] Updated weights for policy 0, policy_version 191522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:08,976][24592] Fps is (10 sec: 47537.2, 60 sec: 42052.7, 300 sec: 43077.8). Total num frames: 1568972800. Throughput: 0: 10953.2. Samples: 142239906. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:08,978][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:10,179][626795] Updated weights for policy 0, policy_version 191532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:11,871][626795] Updated weights for policy 0, policy_version 191542 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:13,550][626795] Updated weights for policy 0, policy_version 191552 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:13,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42052.3, 300 sec: 43126.0). Total num frames: 1569210368. Throughput: 0: 10948.6. Samples: 142275906. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:13,977][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:15,309][626795] Updated weights for policy 0, policy_version 191562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:17,030][626795] Updated weights for policy 0, policy_version 191572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:18,750][626795] Updated weights for policy 0, policy_version 191582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:18,975][24592] Fps is (10 sec: 47515.1, 60 sec: 42052.2, 300 sec: 43126.0). Total num frames: 1569447936. Throughput: 0: 10957.3. Samples: 142347414. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:18,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:20,530][626795] Updated weights for policy 0, policy_version 191592 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:22,331][626795] Updated weights for policy 0, policy_version 191602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:23,918][626795] Updated weights for policy 0, policy_version 191612 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:23,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44782.9, 300 sec: 43153.8). Total num frames: 1569685504. Throughput: 0: 10952.2. Samples: 142418424. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:23,976][24592] Avg episode reward: [(0, '5.107')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:25,598][626795] Updated weights for policy 0, policy_version 191622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:27,345][626795] Updated weights for policy 0, policy_version 191632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:28,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44783.2, 300 sec: 43181.6). Total num frames: 1569923072. Throughput: 0: 10953.5. Samples: 142454352. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:28,977][24592] Avg episode reward: [(0, '4.998')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:29,040][626795] Updated weights for policy 0, policy_version 191642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:30,842][626795] Updated weights for policy 0, policy_version 191652 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:35,077][24592] Fps is (10 sec: 31730.3, 60 sec: 41964.5, 300 sec: 42633.6). Total num frames: 1570037760. Throughput: 0: 9930.9. Samples: 142490334. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:35,078][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:35,985][626795] Updated weights for policy 0, policy_version 191662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:37,748][626795] Updated weights for policy 0, policy_version 191672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:38,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42052.4, 300 sec: 42654.0). Total num frames: 1570234368. Throughput: 0: 10030.4. Samples: 142555656. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:38,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:39,462][626795] Updated weights for policy 0, policy_version 191682 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:41,197][626795] Updated weights for policy 0, policy_version 191692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:42,876][626795] Updated weights for policy 0, policy_version 191702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:43,975][24592] Fps is (10 sec: 48792.3, 60 sec: 42052.3, 300 sec: 43070.5). Total num frames: 1570471936. Throughput: 0: 10908.8. Samples: 142591368. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:43,977][24592] Avg episode reward: [(0, '4.989')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:44,590][626795] Updated weights for policy 0, policy_version 191712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:46,388][626795] Updated weights for policy 0, policy_version 191722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:48,067][626795] Updated weights for policy 0, policy_version 191732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:48,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42052.2, 300 sec: 43098.2). Total num frames: 1570709504. Throughput: 0: 10961.8. Samples: 142662918. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:48,977][24592] Avg episode reward: [(0, '5.371')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:48,979][626772] Saving new best policy, reward=5.371!\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:49,916][626795] Updated weights for policy 0, policy_version 191742 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:51,540][626795] Updated weights for policy 0, policy_version 191752 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:53,243][626795] Updated weights for policy 0, policy_version 191762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:53,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42052.2, 300 sec: 43070.5). Total num frames: 1570947072. Throughput: 0: 10973.5. Samples: 142733712. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:53,977][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:55,070][626795] Updated weights for policy 0, policy_version 191772 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:56,687][626795] Updated weights for policy 0, policy_version 191782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:02:58,484][626795] Updated weights for policy 0, policy_version 191792 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:58,976][24592] Fps is (10 sec: 47511.2, 60 sec: 44770.6, 300 sec: 43098.2). Total num frames: 1571184640. Throughput: 0: 10969.5. Samples: 142769538. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:02:58,977][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:00,193][626795] Updated weights for policy 0, policy_version 191802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:01,895][626795] Updated weights for policy 0, policy_version 191812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:03,569][626795] Updated weights for policy 0, policy_version 191822 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:03,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44646.3, 300 sec: 43070.5). Total num frames: 1571414016. Throughput: 0: 10962.1. Samples: 142840710. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:03,976][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:03,989][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000191824_1571422208.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:04,099][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000190553_1561010176.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:05,416][626795] Updated weights for policy 0, policy_version 191832 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:07,044][626795] Updated weights for policy 0, policy_version 191842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:10,823][24592] Fps is (10 sec: 33190.6, 60 sec: 42120.7, 300 sec: 42526.4). Total num frames: 1571577856. Throughput: 0: 9760.2. Samples: 142875666. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:10,824][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:12,274][626795] Updated weights for policy 0, policy_version 191852 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:13,975][24592] Fps is (10 sec: 31129.8, 60 sec: 41915.7, 300 sec: 42487.4). Total num frames: 1571725312. Throughput: 0: 10027.3. Samples: 142905582. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:13,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:14,037][626795] Updated weights for policy 0, policy_version 191862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:15,787][626795] Updated weights for policy 0, policy_version 191872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:17,579][626795] Updated weights for policy 0, policy_version 191882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:18,975][24592] Fps is (10 sec: 47229.6, 60 sec: 41915.8, 300 sec: 43030.9). Total num frames: 1571962880. Throughput: 0: 11071.7. Samples: 142976364. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:18,976][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:19,214][626795] Updated weights for policy 0, policy_version 191892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:21,082][626795] Updated weights for policy 0, policy_version 191902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:22,653][626795] Updated weights for policy 0, policy_version 191912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:23,975][24592] Fps is (10 sec: 47513.1, 60 sec: 41915.6, 300 sec: 43014.9). Total num frames: 1572200448. Throughput: 0: 10941.7. Samples: 143048034. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:23,976][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:24,513][626795] Updated weights for policy 0, policy_version 191922 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:26,100][626795] Updated weights for policy 0, policy_version 191932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:27,871][626795] Updated weights for policy 0, policy_version 191942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:28,975][24592] Fps is (10 sec: 47513.4, 60 sec: 41915.7, 300 sec: 43015.0). Total num frames: 1572438016. Throughput: 0: 10922.7. Samples: 143082888. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:28,976][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:29,497][626795] Updated weights for policy 0, policy_version 191952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:31,283][626795] Updated weights for policy 0, policy_version 191962 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:32,915][626795] Updated weights for policy 0, policy_version 191972 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:33,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44785.9, 300 sec: 43015.0). Total num frames: 1572675584. Throughput: 0: 10949.6. Samples: 143155650. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:33,976][24592] Avg episode reward: [(0, '4.973')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:34,702][626795] Updated weights for policy 0, policy_version 191982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:36,436][626795] Updated weights for policy 0, policy_version 191992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:38,105][626795] Updated weights for policy 0, policy_version 192002 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:38,975][24592] Fps is (10 sec: 47513.4, 60 sec: 44646.4, 300 sec: 43014.9). Total num frames: 1572913152. Throughput: 0: 10959.9. Samples: 143226906. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:38,977][24592] Avg episode reward: [(0, '4.943')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:39,890][626795] Updated weights for policy 0, policy_version 192012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:41,667][626795] Updated weights for policy 0, policy_version 192022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:46,564][24592] Fps is (10 sec: 34490.1, 60 sec: 42145.5, 300 sec: 42503.2). Total num frames: 1573109760. Throughput: 0: 10349.5. Samples: 143262048. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:46,567][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:46,758][626795] Updated weights for policy 0, policy_version 192032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:48,598][626795] Updated weights for policy 0, policy_version 192042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:48,975][24592] Fps is (10 sec: 31129.8, 60 sec: 41915.8, 300 sec: 42459.6). Total num frames: 1573224448. Throughput: 0: 10034.7. Samples: 143292270. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:48,976][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:50,335][626795] Updated weights for policy 0, policy_version 192052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:51,954][626795] Updated weights for policy 0, policy_version 192062 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:53,745][626795] Updated weights for policy 0, policy_version 192072 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:53,976][24592] Fps is (10 sec: 47526.2, 60 sec: 41915.6, 300 sec: 43013.6). Total num frames: 1573462016. Throughput: 0: 11298.4. Samples: 143363220. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:53,977][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:55,446][626795] Updated weights for policy 0, policy_version 192082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:57,203][626795] Updated weights for policy 0, policy_version 192092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:03:58,961][626795] Updated weights for policy 0, policy_version 192102 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:58,975][24592] Fps is (10 sec: 47513.0, 60 sec: 41916.1, 300 sec: 43014.9). Total num frames: 1573699584. Throughput: 0: 10959.4. Samples: 143398758. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:03:58,976][24592] Avg episode reward: [(0, '4.329')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:00,532][626795] Updated weights for policy 0, policy_version 192112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:02,333][626795] Updated weights for policy 0, policy_version 192122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:03,977][24592] Fps is (10 sec: 47508.0, 60 sec: 42051.3, 300 sec: 43014.7). Total num frames: 1573937152. Throughput: 0: 10971.6. Samples: 143470104. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:03,979][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:04,083][626795] Updated weights for policy 0, policy_version 192132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:05,821][626795] Updated weights for policy 0, policy_version 192142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:07,474][626795] Updated weights for policy 0, policy_version 192152 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:08,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44656.2, 300 sec: 43014.9). Total num frames: 1574174720. Throughput: 0: 10962.5. Samples: 143541348. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:08,976][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:09,164][626795] Updated weights for policy 0, policy_version 192162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:10,938][626795] Updated weights for policy 0, policy_version 192172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:12,675][626795] Updated weights for policy 0, policy_version 192182 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:13,975][24592] Fps is (10 sec: 47520.5, 60 sec: 44782.9, 300 sec: 43014.9). Total num frames: 1574412288. Throughput: 0: 10978.4. Samples: 143576916. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:13,977][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:14,369][626795] Updated weights for policy 0, policy_version 192192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:16,159][626795] Updated weights for policy 0, policy_version 192202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:17,924][626795] Updated weights for policy 0, policy_version 192212 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:22,305][24592] Fps is (10 sec: 35029.7, 60 sec: 42298.7, 300 sec: 42507.4). Total num frames: 1574641664. Throughput: 0: 10191.5. Samples: 143648208. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:22,309][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:23,082][626795] Updated weights for policy 0, policy_version 192222 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:23,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42052.3, 300 sec: 42459.5). Total num frames: 1574723584. Throughput: 0: 10023.2. Samples: 143677950. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:23,976][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:24,908][626795] Updated weights for policy 0, policy_version 192232 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:26,576][626795] Updated weights for policy 0, policy_version 192242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:28,276][626795] Updated weights for policy 0, policy_version 192252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:28,975][24592] Fps is (10 sec: 47900.0, 60 sec: 42052.3, 300 sec: 42995.8). Total num frames: 1574961152. Throughput: 0: 10634.4. Samples: 143713068. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:28,977][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:30,105][626795] Updated weights for policy 0, policy_version 192262 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:31,733][626795] Updated weights for policy 0, policy_version 192272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:33,452][626795] Updated weights for policy 0, policy_version 192282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:33,703][626772] Signal inference workers to stop experience collection... (2000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:33,704][626772] Signal inference workers to resume experience collection... (2000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:33,719][626795] InferenceWorker_p0-w0: stopping experience collection (2000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:33,719][626795] InferenceWorker_p0-w0: resuming experience collection (2000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:33,976][24592] Fps is (10 sec: 47512.8, 60 sec: 42052.1, 300 sec: 43014.9). Total num frames: 1575198720. Throughput: 0: 10937.1. Samples: 143784444. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:33,977][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:35,230][626795] Updated weights for policy 0, policy_version 192292 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:36,982][626795] Updated weights for policy 0, policy_version 192302 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:38,608][626795] Updated weights for policy 0, policy_version 192312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:38,985][24592] Fps is (10 sec: 46649.5, 60 sec: 41909.0, 300 sec: 42985.8). Total num frames: 1575428096. Throughput: 0: 10928.3. Samples: 143855094. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:38,986][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:40,426][626795] Updated weights for policy 0, policy_version 192322 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:42,089][626795] Updated weights for policy 0, policy_version 192332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:43,787][626795] Updated weights for policy 0, policy_version 192342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:43,975][24592] Fps is (10 sec: 46695.1, 60 sec: 44519.0, 300 sec: 43015.3). Total num frames: 1575665664. Throughput: 0: 10940.7. Samples: 143891088. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:43,976][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:45,624][626795] Updated weights for policy 0, policy_version 192352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:47,291][626795] Updated weights for policy 0, policy_version 192362 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:48,928][626795] Updated weights for policy 0, policy_version 192372 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:48,975][24592] Fps is (10 sec: 48379.3, 60 sec: 44782.9, 300 sec: 43014.9). Total num frames: 1575911424. Throughput: 0: 10939.3. Samples: 143962356. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:48,976][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:50,752][626795] Updated weights for policy 0, policy_version 192382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:52,397][626795] Updated weights for policy 0, policy_version 192392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:53,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44646.6, 300 sec: 42959.4). Total num frames: 1576140800. Throughput: 0: 10950.6. Samples: 144034122. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:53,977][24592] Avg episode reward: [(0, '4.955')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:54,099][626795] Updated weights for policy 0, policy_version 192402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:58,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42052.3, 300 sec: 42459.6). Total num frames: 1576222720. Throughput: 0: 10410.7. Samples: 144045396. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:04:58,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:04:59,366][626795] Updated weights for policy 0, policy_version 192412 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:01,155][626795] Updated weights for policy 0, policy_version 192422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:02,778][626795] Updated weights for policy 0, policy_version 192432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:03,976][24592] Fps is (10 sec: 31129.1, 60 sec: 41916.6, 300 sec: 42950.7). Total num frames: 1576452096. Throughput: 0: 10827.2. Samples: 144099378. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:03,977][24592] Avg episode reward: [(0, '4.855')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000192439_1576460288.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:04,045][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000191189_1566220288.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:04,576][626795] Updated weights for policy 0, policy_version 192442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:06,280][626795] Updated weights for policy 0, policy_version 192452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:07,959][626795] Updated weights for policy 0, policy_version 192462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:08,975][24592] Fps is (10 sec: 46694.1, 60 sec: 41915.8, 300 sec: 42987.2). Total num frames: 1576689664. Throughput: 0: 10941.6. Samples: 144170322. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:08,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:09,674][626795] Updated weights for policy 0, policy_version 192472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:11,447][626795] Updated weights for policy 0, policy_version 192482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:13,147][626795] Updated weights for policy 0, policy_version 192492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:13,976][24592] Fps is (10 sec: 47513.6, 60 sec: 41915.6, 300 sec: 43015.0). Total num frames: 1576927232. Throughput: 0: 10954.9. Samples: 144206040. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:13,976][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:14,901][626795] Updated weights for policy 0, policy_version 192502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:16,588][626795] Updated weights for policy 0, policy_version 192512 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:18,270][626795] Updated weights for policy 0, policy_version 192522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:18,975][24592] Fps is (10 sec: 48332.9, 60 sec: 44667.9, 300 sec: 43014.9). Total num frames: 1577172992. Throughput: 0: 10953.2. Samples: 144277338. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:18,977][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:20,045][626795] Updated weights for policy 0, policy_version 192532 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:21,835][626795] Updated weights for policy 0, policy_version 192542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:23,506][626795] Updated weights for policy 0, policy_version 192552 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:23,975][24592] Fps is (10 sec: 48333.4, 60 sec: 44782.9, 300 sec: 43014.9). Total num frames: 1577410560. Throughput: 0: 10987.0. Samples: 144349404. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:23,977][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:25,154][626795] Updated weights for policy 0, policy_version 192562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:26,890][626795] Updated weights for policy 0, policy_version 192572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:28,643][626795] Updated weights for policy 0, policy_version 192582 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:28,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44782.9, 300 sec: 43042.7). Total num frames: 1577648128. Throughput: 0: 10981.1. Samples: 144385236. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:28,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:33,855][626795] Updated weights for policy 0, policy_version 192592 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:33,975][24592] Fps is (10 sec: 30310.5, 60 sec: 41915.9, 300 sec: 42431.8). Total num frames: 1577713664. Throughput: 0: 10380.5. Samples: 144429480. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:33,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:35,446][626795] Updated weights for policy 0, policy_version 192602 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:37,293][626795] Updated weights for policy 0, policy_version 192612 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:38,976][24592] Fps is (10 sec: 30308.5, 60 sec: 42058.5, 300 sec: 42431.7). Total num frames: 1577951232. Throughput: 0: 10049.3. Samples: 144486348. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:38,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:38,983][626795] Updated weights for policy 0, policy_version 192622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:40,737][626795] Updated weights for policy 0, policy_version 192632 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:42,562][626795] Updated weights for policy 0, policy_version 192642 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:43,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42052.3, 300 sec: 43014.9). Total num frames: 1578188800. Throughput: 0: 10573.1. Samples: 144521184. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:43,977][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:44,187][626795] Updated weights for policy 0, policy_version 192652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:45,958][626795] Updated weights for policy 0, policy_version 192662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:47,676][626795] Updated weights for policy 0, policy_version 192672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:48,976][24592] Fps is (10 sec: 47515.0, 60 sec: 41915.5, 300 sec: 43042.6). Total num frames: 1578426368. Throughput: 0: 10964.6. Samples: 144592788. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:48,976][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:49,369][626795] Updated weights for policy 0, policy_version 192682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:51,114][626795] Updated weights for policy 0, policy_version 192692 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:52,916][626795] Updated weights for policy 0, policy_version 192702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:53,976][24592] Fps is (10 sec: 47513.0, 60 sec: 42052.2, 300 sec: 43015.0). Total num frames: 1578663936. Throughput: 0: 10957.7. Samples: 144663420. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:53,977][24592] Avg episode reward: [(0, '5.020')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:54,660][626795] Updated weights for policy 0, policy_version 192712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:56,345][626795] Updated weights for policy 0, policy_version 192722 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:58,096][626795] Updated weights for policy 0, policy_version 192732 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:58,975][24592] Fps is (10 sec: 47514.8, 60 sec: 44646.3, 300 sec: 43014.9). Total num frames: 1578901504. Throughput: 0: 10953.7. Samples: 144698958. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:05:58,976][24592] Avg episode reward: [(0, '4.867')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:05:59,744][626795] Updated weights for policy 0, policy_version 192742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:01,416][626795] Updated weights for policy 0, policy_version 192752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:03,199][626795] Updated weights for policy 0, policy_version 192762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:03,975][24592] Fps is (10 sec: 47514.5, 60 sec: 44783.1, 300 sec: 43015.1). Total num frames: 1579139072. Throughput: 0: 10968.4. Samples: 144770916. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:03,976][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:05,148][626795] Updated weights for policy 0, policy_version 192772 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:09,512][24592] Fps is (10 sec: 31876.2, 60 sec: 42085.3, 300 sec: 42465.6). Total num frames: 1579237376. Throughput: 0: 10014.8. Samples: 144805446. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:09,513][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:10,135][626795] Updated weights for policy 0, policy_version 192782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:11,920][626795] Updated weights for policy 0, policy_version 192792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:13,607][626795] Updated weights for policy 0, policy_version 192802 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:13,975][24592] Fps is (10 sec: 30310.2, 60 sec: 41915.8, 300 sec: 42431.8). Total num frames: 1579442176. Throughput: 0: 10008.3. Samples: 144835608. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:13,977][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:15,304][626795] Updated weights for policy 0, policy_version 192812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:17,108][626795] Updated weights for policy 0, policy_version 192822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:18,808][626795] Updated weights for policy 0, policy_version 192832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:18,975][24592] Fps is (10 sec: 47612.2, 60 sec: 41915.7, 300 sec: 43014.9). Total num frames: 1579687936. Throughput: 0: 10598.0. Samples: 144906390. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:18,977][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:20,505][626795] Updated weights for policy 0, policy_version 192842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:22,207][626795] Updated weights for policy 0, policy_version 192852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:23,923][626795] Updated weights for policy 0, policy_version 192862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:23,975][24592] Fps is (10 sec: 48332.2, 60 sec: 41915.7, 300 sec: 43015.0). Total num frames: 1579925504. Throughput: 0: 10939.7. Samples: 144978630. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:23,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:25,718][626795] Updated weights for policy 0, policy_version 192872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:27,414][626795] Updated weights for policy 0, policy_version 192882 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:28,975][24592] Fps is (10 sec: 47513.8, 60 sec: 41915.8, 300 sec: 43014.9). Total num frames: 1580163072. Throughput: 0: 10949.6. Samples: 145013916. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:28,978][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:29,162][626795] Updated weights for policy 0, policy_version 192892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:30,732][626795] Updated weights for policy 0, policy_version 192902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:32,545][626795] Updated weights for policy 0, policy_version 192912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:33,976][24592] Fps is (10 sec: 47513.3, 60 sec: 44782.8, 300 sec: 43014.9). Total num frames: 1580400640. Throughput: 0: 10958.4. Samples: 145085916. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:33,977][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:34,245][626795] Updated weights for policy 0, policy_version 192922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:35,958][626795] Updated weights for policy 0, policy_version 192932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:37,717][626795] Updated weights for policy 0, policy_version 192942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:38,975][24592] Fps is (10 sec: 47513.0, 60 sec: 44783.4, 300 sec: 43014.9). Total num frames: 1580638208. Throughput: 0: 10976.8. Samples: 145157376. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:38,976][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:39,443][626795] Updated weights for policy 0, policy_version 192952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:41,190][626795] Updated weights for policy 0, policy_version 192962 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:45,161][24592] Fps is (10 sec: 32957.0, 60 sec: 42174.6, 300 sec: 42483.2). Total num frames: 1580769280. Throughput: 0: 10688.5. Samples: 145192614. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:45,162][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:46,367][626795] Updated weights for policy 0, policy_version 192972 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:48,062][626795] Updated weights for policy 0, policy_version 192982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:48,976][24592] Fps is (10 sec: 31128.6, 60 sec: 42052.2, 300 sec: 42459.5). Total num frames: 1580949504. Throughput: 0: 10054.7. Samples: 145223382. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:48,977][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:49,723][626795] Updated weights for policy 0, policy_version 192992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:51,576][626795] Updated weights for policy 0, policy_version 193002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:53,228][626795] Updated weights for policy 0, policy_version 193012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:53,975][24592] Fps is (10 sec: 47399.3, 60 sec: 42052.4, 300 sec: 43010.4). Total num frames: 1581187072. Throughput: 0: 10985.2. Samples: 145293882. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:53,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:54,948][626795] Updated weights for policy 0, policy_version 193022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:56,727][626795] Updated weights for policy 0, policy_version 193032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:06:58,529][626795] Updated weights for policy 0, policy_version 193042 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:58,975][24592] Fps is (10 sec: 46696.2, 60 sec: 41915.8, 300 sec: 42987.2). Total num frames: 1581416448. Throughput: 0: 10963.6. Samples: 145328970. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:06:58,976][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:00,158][626795] Updated weights for policy 0, policy_version 193052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:01,931][626795] Updated weights for policy 0, policy_version 193062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:03,769][626795] Updated weights for policy 0, policy_version 193072 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:03,976][24592] Fps is (10 sec: 47512.2, 60 sec: 42052.0, 300 sec: 43015.0). Total num frames: 1581662208. Throughput: 0: 10974.9. Samples: 145400262. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:03,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000193074_1581662208.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:04,061][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000191824_1571422208.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:05,330][626795] Updated weights for policy 0, policy_version 193082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:07,103][626795] Updated weights for policy 0, policy_version 193092 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:08,805][626795] Updated weights for policy 0, policy_version 193102 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:08,975][24592] Fps is (10 sec: 48333.0, 60 sec: 44774.0, 300 sec: 43014.9). Total num frames: 1581899776. Throughput: 0: 10961.9. Samples: 145471914. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:08,976][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:10,502][626795] Updated weights for policy 0, policy_version 193112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:12,379][626795] Updated weights for policy 0, policy_version 193122 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:13,975][24592] Fps is (10 sec: 46695.5, 60 sec: 44782.9, 300 sec: 42987.2). Total num frames: 1582129152. Throughput: 0: 10955.8. Samples: 145506930. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:13,977][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:13,984][626795] Updated weights for policy 0, policy_version 193132 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:15,770][626795] Updated weights for policy 0, policy_version 193142 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:20,870][24592] Fps is (10 sec: 33057.8, 60 sec: 42088.5, 300 sec: 42464.5). Total num frames: 1582292992. Throughput: 0: 9724.8. Samples: 145541958. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:20,871][24592] Avg episode reward: [(0, '4.990')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:21,020][626795] Updated weights for policy 0, policy_version 193152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:22,793][626795] Updated weights for policy 0, policy_version 193162 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:23,975][24592] Fps is (10 sec: 30310.8, 60 sec: 41779.4, 300 sec: 42404.0). Total num frames: 1582432256. Throughput: 0: 9997.0. Samples: 145607238. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:23,976][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:24,523][626795] Updated weights for policy 0, policy_version 193172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:26,205][626795] Updated weights for policy 0, policy_version 193182 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:27,849][626795] Updated weights for policy 0, policy_version 193192 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:28,975][24592] Fps is (10 sec: 46492.3, 60 sec: 41779.1, 300 sec: 42981.0). Total num frames: 1582669824. Throughput: 0: 10279.8. Samples: 145643016. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:28,976][24592] Avg episode reward: [(0, '4.446')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:29,721][626795] Updated weights for policy 0, policy_version 193202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:31,425][626795] Updated weights for policy 0, policy_version 193212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:33,143][626795] Updated weights for policy 0, policy_version 193222 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:33,975][24592] Fps is (10 sec: 47513.4, 60 sec: 41779.4, 300 sec: 42959.4). Total num frames: 1582907392. Throughput: 0: 10903.7. Samples: 145714044. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:33,976][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:34,803][626795] Updated weights for policy 0, policy_version 193232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:36,719][626795] Updated weights for policy 0, policy_version 193242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:38,361][626795] Updated weights for policy 0, policy_version 193252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:38,976][24592] Fps is (10 sec: 47512.7, 60 sec: 41779.1, 300 sec: 42959.4). Total num frames: 1583144960. Throughput: 0: 10876.5. Samples: 145783326. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:38,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:40,146][626795] Updated weights for policy 0, policy_version 193262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:41,863][626795] Updated weights for policy 0, policy_version 193272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:43,642][626795] Updated weights for policy 0, policy_version 193282 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44432.2, 300 sec: 42959.4). Total num frames: 1583382528. Throughput: 0: 10890.7. Samples: 145819050. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:43,977][24592] Avg episode reward: [(0, '4.877')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:45,334][626795] Updated weights for policy 0, policy_version 193292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:47,123][626795] Updated weights for policy 0, policy_version 193302 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:48,797][626795] Updated weights for policy 0, policy_version 193312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:48,975][24592] Fps is (10 sec: 46695.7, 60 sec: 44373.7, 300 sec: 42931.6). Total num frames: 1583611904. Throughput: 0: 10877.9. Samples: 145889766. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:48,976][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:50,491][626795] Updated weights for policy 0, policy_version 193322 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:52,338][626795] Updated weights for policy 0, policy_version 193332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:56,590][24592] Fps is (10 sec: 34417.7, 60 sec: 41996.9, 300 sec: 42444.4). Total num frames: 1583816704. Throughput: 0: 9526.6. Samples: 145925520. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:56,591][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:57,526][626795] Updated weights for policy 0, policy_version 193342 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:58,975][24592] Fps is (10 sec: 31129.6, 60 sec: 41779.2, 300 sec: 42404.0). Total num frames: 1583923200. Throughput: 0: 9956.4. Samples: 145954968. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:07:58,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:07:59,281][626795] Updated weights for policy 0, policy_version 193352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:01,022][626795] Updated weights for policy 0, policy_version 193362 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:02,727][626795] Updated weights for policy 0, policy_version 193372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:03,975][24592] Fps is (10 sec: 46588.9, 60 sec: 41642.9, 300 sec: 42922.8). Total num frames: 1584160768. Throughput: 0: 11224.6. Samples: 146025798. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:03,976][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:04,418][626795] Updated weights for policy 0, policy_version 193382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:06,260][626795] Updated weights for policy 0, policy_version 193392 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:07,906][626795] Updated weights for policy 0, policy_version 193402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:08,978][24592] Fps is (10 sec: 46684.0, 60 sec: 41504.6, 300 sec: 42931.3). Total num frames: 1584390144. Throughput: 0: 10871.3. Samples: 146096472. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:08,979][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:09,649][626795] Updated weights for policy 0, policy_version 193412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:11,376][626795] Updated weights for policy 0, policy_version 193422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:13,058][626795] Updated weights for policy 0, policy_version 193432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:13,975][24592] Fps is (10 sec: 47513.6, 60 sec: 41779.3, 300 sec: 42959.4). Total num frames: 1584635904. Throughput: 0: 10877.1. Samples: 146132484. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:13,976][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:14,746][626795] Updated weights for policy 0, policy_version 193442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:16,533][626795] Updated weights for policy 0, policy_version 193452 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:18,240][626795] Updated weights for policy 0, policy_version 193462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:18,976][24592] Fps is (10 sec: 48341.8, 60 sec: 44410.2, 300 sec: 42959.4). Total num frames: 1584873472. Throughput: 0: 10875.5. Samples: 146203446. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:18,977][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:19,985][626795] Updated weights for policy 0, policy_version 193472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:21,788][626795] Updated weights for policy 0, policy_version 193482 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:23,446][626795] Updated weights for policy 0, policy_version 193492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:23,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44509.8, 300 sec: 42931.6). Total num frames: 1585102848. Throughput: 0: 10918.2. Samples: 146274642. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:23,976][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:25,222][626795] Updated weights for policy 0, policy_version 193502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:27,022][626795] Updated weights for policy 0, policy_version 193512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:28,733][626795] Updated weights for policy 0, policy_version 193522 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:32,320][24592] Fps is (10 sec: 34377.8, 60 sec: 42030.4, 300 sec: 42422.9). Total num frames: 1585332224. Throughput: 0: 10148.6. Samples: 146309682. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:32,321][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:33,857][626795] Updated weights for policy 0, policy_version 193532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:33,975][24592] Fps is (10 sec: 31129.4, 60 sec: 41779.2, 300 sec: 42376.2). Total num frames: 1585414144. Throughput: 0: 9995.6. Samples: 146339568. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:33,976][24592] Avg episode reward: [(0, '5.090')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:35,546][626795] Updated weights for policy 0, policy_version 193542 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:37,306][626795] Updated weights for policy 0, policy_version 193552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:38,975][24592] Fps is (10 sec: 48005.7, 60 sec: 41779.4, 300 sec: 42891.4). Total num frames: 1585651712. Throughput: 0: 11456.1. Samples: 146411088. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:38,979][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:39,040][626795] Updated weights for policy 0, policy_version 193562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:40,718][626795] Updated weights for policy 0, policy_version 193572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:42,503][626795] Updated weights for policy 0, policy_version 193582 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:43,975][24592] Fps is (10 sec: 47513.5, 60 sec: 41779.2, 300 sec: 42931.6). Total num frames: 1585889280. Throughput: 0: 10918.2. Samples: 146446290. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:43,976][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:44,272][626795] Updated weights for policy 0, policy_version 193592 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:46,028][626795] Updated weights for policy 0, policy_version 193602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:47,719][626795] Updated weights for policy 0, policy_version 193612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:48,975][24592] Fps is (10 sec: 47513.7, 60 sec: 41915.8, 300 sec: 42931.7). Total num frames: 1586126848. Throughput: 0: 10921.3. Samples: 146517258. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:48,976][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:49,398][626795] Updated weights for policy 0, policy_version 193622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:51,100][626795] Updated weights for policy 0, policy_version 193632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:52,899][626795] Updated weights for policy 0, policy_version 193642 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:53,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44396.7, 300 sec: 42931.6). Total num frames: 1586364416. Throughput: 0: 10935.5. Samples: 146588544. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:53,976][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:54,562][626795] Updated weights for policy 0, policy_version 193652 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:56,280][626795] Updated weights for policy 0, policy_version 193662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:58,061][626795] Updated weights for policy 0, policy_version 193672 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:58,104][626772] Signal inference workers to stop experience collection... (2050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:58,105][626772] Signal inference workers to resume experience collection... (2050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:58,110][626795] InferenceWorker_p0-w0: stopping experience collection (2050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:58,115][626795] InferenceWorker_p0-w0: resuming experience collection (2050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:58,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44646.4, 300 sec: 42931.9). Total num frames: 1586601984. Throughput: 0: 10921.4. Samples: 146623944. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:08:58,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:08:59,817][626795] Updated weights for policy 0, policy_version 193682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:01,571][626795] Updated weights for policy 0, policy_version 193692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:03,356][626795] Updated weights for policy 0, policy_version 193702 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:03,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44509.9, 300 sec: 42903.9). Total num frames: 1586831360. Throughput: 0: 10915.6. Samples: 146694642. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:03,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:04,008][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000193706_1586839552.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:04,053][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000192439_1576460288.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:08,573][626795] Updated weights for policy 0, policy_version 193712 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:08,975][24592] Fps is (10 sec: 30310.1, 60 sec: 41917.3, 300 sec: 42348.5). Total num frames: 1586905088. Throughput: 0: 9979.3. Samples: 146723712. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:08,976][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:10,360][626795] Updated weights for policy 0, policy_version 193722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:11,995][626795] Updated weights for policy 0, policy_version 193732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:13,751][626795] Updated weights for policy 0, policy_version 193742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:13,982][24592] Fps is (10 sec: 31110.3, 60 sec: 41774.9, 300 sec: 42859.2). Total num frames: 1587142656. Throughput: 0: 10792.0. Samples: 146759292. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:13,985][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:15,533][626795] Updated weights for policy 0, policy_version 193752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:17,169][626795] Updated weights for policy 0, policy_version 193762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:18,886][626795] Updated weights for policy 0, policy_version 193772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:18,976][24592] Fps is (10 sec: 47511.8, 60 sec: 41779.2, 300 sec: 42903.8). Total num frames: 1587380224. Throughput: 0: 10910.6. Samples: 146830548. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:18,977][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:20,563][626795] Updated weights for policy 0, policy_version 193782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:22,373][626795] Updated weights for policy 0, policy_version 193792 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:23,978][24592] Fps is (10 sec: 47529.2, 60 sec: 41913.7, 300 sec: 42903.4). Total num frames: 1587617792. Throughput: 0: 10914.5. Samples: 146902272. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:23,979][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:24,100][626795] Updated weights for policy 0, policy_version 193802 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:25,789][626795] Updated weights for policy 0, policy_version 193812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:27,490][626795] Updated weights for policy 0, policy_version 193822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:28,976][24592] Fps is (10 sec: 47514.7, 60 sec: 44534.8, 300 sec: 42903.9). Total num frames: 1587855360. Throughput: 0: 10926.5. Samples: 146937984. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:28,978][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:29,205][626795] Updated weights for policy 0, policy_version 193832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:30,928][626795] Updated weights for policy 0, policy_version 193842 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:32,603][626795] Updated weights for policy 0, policy_version 193852 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:33,975][24592] Fps is (10 sec: 47527.4, 60 sec: 44646.4, 300 sec: 42933.0). Total num frames: 1588092928. Throughput: 0: 10947.6. Samples: 147009900. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:33,976][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:34,351][626795] Updated weights for policy 0, policy_version 193862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:36,116][626795] Updated weights for policy 0, policy_version 193872 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:37,830][626795] Updated weights for policy 0, policy_version 193882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:38,975][24592] Fps is (10 sec: 47514.2, 60 sec: 44646.4, 300 sec: 42931.6). Total num frames: 1588330496. Throughput: 0: 10938.5. Samples: 147080778. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:38,977][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:39,523][626795] Updated weights for policy 0, policy_version 193892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:43,975][24592] Fps is (10 sec: 31129.8, 60 sec: 41915.8, 300 sec: 42348.5). Total num frames: 1588404224. Throughput: 0: 10598.5. Samples: 147100878. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:43,977][24592] Avg episode reward: [(0, '4.432')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:44,837][626795] Updated weights for policy 0, policy_version 193902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:46,407][626795] Updated weights for policy 0, policy_version 193912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:48,240][626795] Updated weights for policy 0, policy_version 193922 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:48,975][24592] Fps is (10 sec: 31129.4, 60 sec: 41915.6, 300 sec: 42376.2). Total num frames: 1588641792. Throughput: 0: 10033.2. Samples: 147146136. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:48,976][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:49,924][626795] Updated weights for policy 0, policy_version 193932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:51,675][626795] Updated weights for policy 0, policy_version 193942 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:53,474][626795] Updated weights for policy 0, policy_version 193952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:53,975][24592] Fps is (10 sec: 47513.6, 60 sec: 41915.8, 300 sec: 42903.9). Total num frames: 1588879360. Throughput: 0: 10954.9. Samples: 147216684. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:53,977][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:55,268][626795] Updated weights for policy 0, policy_version 193962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:56,959][626795] Updated weights for policy 0, policy_version 193972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:09:58,577][626795] Updated weights for policy 0, policy_version 193982 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:58,976][24592] Fps is (10 sec: 46694.0, 60 sec: 41779.0, 300 sec: 42903.9). Total num frames: 1589108736. Throughput: 0: 10942.8. Samples: 147251652. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:09:58,978][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:00,396][626795] Updated weights for policy 0, policy_version 193992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:02,231][626795] Updated weights for policy 0, policy_version 194002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:03,975][24592] Fps is (10 sec: 45874.9, 60 sec: 41779.2, 300 sec: 42876.1). Total num frames: 1589338112. Throughput: 0: 10913.7. Samples: 147321660. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:03,978][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:04,011][626795] Updated weights for policy 0, policy_version 194012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:05,712][626795] Updated weights for policy 0, policy_version 194022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:07,412][626795] Updated weights for policy 0, policy_version 194032 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:08,975][24592] Fps is (10 sec: 47514.5, 60 sec: 44646.4, 300 sec: 42903.9). Total num frames: 1589583872. Throughput: 0: 10897.0. Samples: 147392604. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:08,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:09,174][626795] Updated weights for policy 0, policy_version 194042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:10,884][626795] Updated weights for policy 0, policy_version 194052 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:12,722][626795] Updated weights for policy 0, policy_version 194062 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:13,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44514.5, 300 sec: 42848.3). Total num frames: 1589813248. Throughput: 0: 10876.0. Samples: 147427404. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:13,976][24592] Avg episode reward: [(0, '4.439')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:14,361][626795] Updated weights for policy 0, policy_version 194072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:19,549][24592] Fps is (10 sec: 30990.1, 60 sec: 41789.5, 300 sec: 42294.0). Total num frames: 1589911552. Throughput: 0: 9940.7. Samples: 147462936. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:19,551][24592] Avg episode reward: [(0, '4.965')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:19,695][626795] Updated weights for policy 0, policy_version 194082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:21,325][626795] Updated weights for policy 0, policy_version 194092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:23,021][626795] Updated weights for policy 0, policy_version 194102 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:23,976][24592] Fps is (10 sec: 30309.2, 60 sec: 41644.4, 300 sec: 42265.1). Total num frames: 1590116352. Throughput: 0: 9934.1. Samples: 147527814. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:23,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:24,880][626795] Updated weights for policy 0, policy_version 194112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:26,664][626795] Updated weights for policy 0, policy_version 194122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:28,321][626795] Updated weights for policy 0, policy_version 194132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:28,975][24592] Fps is (10 sec: 46928.8, 60 sec: 41642.7, 300 sec: 42848.3). Total num frames: 1590353920. Throughput: 0: 10276.5. Samples: 147563322. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:28,976][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:30,038][626795] Updated weights for policy 0, policy_version 194142 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:31,807][626795] Updated weights for policy 0, policy_version 194152 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:33,493][626795] Updated weights for policy 0, policy_version 194162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:33,975][24592] Fps is (10 sec: 48334.8, 60 sec: 41779.2, 300 sec: 42876.2). Total num frames: 1590599680. Throughput: 0: 10854.6. Samples: 147634590. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:33,977][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:35,227][626795] Updated weights for policy 0, policy_version 194172 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:36,904][626795] Updated weights for policy 0, policy_version 194182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:38,679][626795] Updated weights for policy 0, policy_version 194192 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:38,975][24592] Fps is (10 sec: 47514.0, 60 sec: 41642.7, 300 sec: 42848.3). Total num frames: 1590829056. Throughput: 0: 10871.2. Samples: 147705888. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:38,976][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:40,423][626795] Updated weights for policy 0, policy_version 194202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:42,089][626795] Updated weights for policy 0, policy_version 194212 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:43,823][626795] Updated weights for policy 0, policy_version 194222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:43,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44373.3, 300 sec: 42848.4). Total num frames: 1591066624. Throughput: 0: 10880.8. Samples: 147741288. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:43,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:45,650][626795] Updated weights for policy 0, policy_version 194232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:47,325][626795] Updated weights for policy 0, policy_version 194242 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:48,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44373.4, 300 sec: 42848.4). Total num frames: 1591304192. Throughput: 0: 10902.4. Samples: 147812268. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:48,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:49,143][626795] Updated weights for policy 0, policy_version 194252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:50,815][626795] Updated weights for policy 0, policy_version 194262 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:55,261][24592] Fps is (10 sec: 32663.5, 60 sec: 41704.5, 300 sec: 42302.9). Total num frames: 1591435264. Throughput: 0: 9823.5. Samples: 147847296. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:55,262][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:56,107][626795] Updated weights for policy 0, policy_version 194272 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:57,819][626795] Updated weights for policy 0, policy_version 194282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:58,975][24592] Fps is (10 sec: 30310.3, 60 sec: 41642.8, 300 sec: 42265.2). Total num frames: 1591607296. Throughput: 0: 9981.7. Samples: 147876582. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:10:58,976][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:10:59,640][626795] Updated weights for policy 0, policy_version 194292 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:01,311][626795] Updated weights for policy 0, policy_version 194302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:03,125][626795] Updated weights for policy 0, policy_version 194312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:03,976][24592] Fps is (10 sec: 46063.7, 60 sec: 41642.6, 300 sec: 42787.3). Total num frames: 1591836672. Throughput: 0: 10865.4. Samples: 147945648. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:03,976][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000194317_1591844864.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:04,077][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000193074_1581662208.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:04,992][626795] Updated weights for policy 0, policy_version 194322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:06,702][626795] Updated weights for policy 0, policy_version 194332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:08,343][626795] Updated weights for policy 0, policy_version 194342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:08,976][24592] Fps is (10 sec: 46690.4, 60 sec: 41505.5, 300 sec: 42820.4). Total num frames: 1592074240. Throughput: 0: 10853.4. Samples: 148016220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:08,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:10,142][626795] Updated weights for policy 0, policy_version 194352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:11,761][626795] Updated weights for policy 0, policy_version 194362 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:13,535][626795] Updated weights for policy 0, policy_version 194372 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:13,975][24592] Fps is (10 sec: 47514.4, 60 sec: 41642.7, 300 sec: 42792.8). Total num frames: 1592311808. Throughput: 0: 10860.4. Samples: 148052040. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:13,977][24592] Avg episode reward: [(0, '4.970')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:15,291][626795] Updated weights for policy 0, policy_version 194382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:16,948][626795] Updated weights for policy 0, policy_version 194392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:18,698][626795] Updated weights for policy 0, policy_version 194402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:18,976][24592] Fps is (10 sec: 47516.6, 60 sec: 44388.0, 300 sec: 42792.8). Total num frames: 1592549376. Throughput: 0: 10855.8. Samples: 148123104. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:18,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:20,380][626795] Updated weights for policy 0, policy_version 194412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:22,140][626795] Updated weights for policy 0, policy_version 194422 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:23,886][626795] Updated weights for policy 0, policy_version 194432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:23,976][24592] Fps is (10 sec: 47511.9, 60 sec: 44509.9, 300 sec: 42792.7). Total num frames: 1592786944. Throughput: 0: 10869.8. Samples: 148195032. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:23,977][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:25,590][626795] Updated weights for policy 0, policy_version 194442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:27,192][626795] Updated weights for policy 0, policy_version 194452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:30,783][24592] Fps is (10 sec: 34690.4, 60 sec: 42147.9, 300 sec: 42311.4). Total num frames: 1592958976. Throughput: 0: 10465.0. Samples: 148231128. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:30,784][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:32,337][626795] Updated weights for policy 0, policy_version 194462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:33,976][24592] Fps is (10 sec: 31948.4, 60 sec: 41778.8, 300 sec: 42265.1). Total num frames: 1593106432. Throughput: 0: 10009.9. Samples: 148262718. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:33,978][24592] Avg episode reward: [(0, '4.933')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:34,097][626795] Updated weights for policy 0, policy_version 194472 (0.0033)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:35,852][626795] Updated weights for policy 0, policy_version 194482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:37,643][626795] Updated weights for policy 0, policy_version 194492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:38,975][24592] Fps is (10 sec: 45997.2, 60 sec: 41779.2, 300 sec: 42770.3). Total num frames: 1593335808. Throughput: 0: 11093.4. Samples: 148332234. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:38,977][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:39,474][626795] Updated weights for policy 0, policy_version 194502 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:41,196][626795] Updated weights for policy 0, policy_version 194512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:42,982][626795] Updated weights for policy 0, policy_version 194522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:43,975][24592] Fps is (10 sec: 46696.5, 60 sec: 41779.2, 300 sec: 42792.8). Total num frames: 1593573376. Throughput: 0: 10889.1. Samples: 148366590. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:43,977][24592] Avg episode reward: [(0, '4.493')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:44,490][626795] Updated weights for policy 0, policy_version 194532 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:46,328][626795] Updated weights for policy 0, policy_version 194542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:47,944][626795] Updated weights for policy 0, policy_version 194552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:48,976][24592] Fps is (10 sec: 48331.9, 60 sec: 41915.6, 300 sec: 42820.5). Total num frames: 1593819136. Throughput: 0: 10957.7. Samples: 148438746. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:48,978][24592] Avg episode reward: [(0, '4.986')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:49,673][626795] Updated weights for policy 0, policy_version 194562 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:51,477][626795] Updated weights for policy 0, policy_version 194572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:53,222][626795] Updated weights for policy 0, policy_version 194582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:53,975][24592] Fps is (10 sec: 47513.2, 60 sec: 44508.0, 300 sec: 42820.5). Total num frames: 1594048512. Throughput: 0: 10968.0. Samples: 148509774. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:53,976][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:54,860][626795] Updated weights for policy 0, policy_version 194592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:56,642][626795] Updated weights for policy 0, policy_version 194602 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:58,323][626795] Updated weights for policy 0, policy_version 194612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:58,975][24592] Fps is (10 sec: 46695.0, 60 sec: 44646.4, 300 sec: 42792.8). Total num frames: 1594286080. Throughput: 0: 10974.7. Samples: 148545900. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:11:58,977][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:11:59,990][626795] Updated weights for policy 0, policy_version 194622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:01,760][626795] Updated weights for policy 0, policy_version 194632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:06,375][24592] Fps is (10 sec: 35014.4, 60 sec: 42404.2, 300 sec: 42309.7). Total num frames: 1594482688. Throughput: 0: 10440.2. Samples: 148617966. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:06,377][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:06,780][626795] Updated weights for policy 0, policy_version 194642 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:08,532][626795] Updated weights for policy 0, policy_version 194652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:08,975][24592] Fps is (10 sec: 31948.5, 60 sec: 42189.3, 300 sec: 42292.9). Total num frames: 1594605568. Throughput: 0: 10100.2. Samples: 148649538. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:08,976][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:10,244][626795] Updated weights for policy 0, policy_version 194662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:12,047][626795] Updated weights for policy 0, policy_version 194672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:13,853][626795] Updated weights for policy 0, policy_version 194682 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:13,976][24592] Fps is (10 sec: 46344.6, 60 sec: 42051.7, 300 sec: 42789.8). Total num frames: 1594834944. Throughput: 0: 10477.2. Samples: 148683672. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:13,980][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:15,619][626795] Updated weights for policy 0, policy_version 194692 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:17,295][626795] Updated weights for policy 0, policy_version 194702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:18,975][24592] Fps is (10 sec: 46695.2, 60 sec: 42052.4, 300 sec: 42848.3). Total num frames: 1595072512. Throughput: 0: 10919.2. Samples: 148754076. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:18,976][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:19,001][626795] Updated weights for policy 0, policy_version 194712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:20,782][626795] Updated weights for policy 0, policy_version 194722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:22,459][626795] Updated weights for policy 0, policy_version 194732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:23,975][24592] Fps is (10 sec: 47517.0, 60 sec: 42052.4, 300 sec: 42848.3). Total num frames: 1595310080. Throughput: 0: 10968.6. Samples: 148825824. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:23,978][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:24,241][626795] Updated weights for policy 0, policy_version 194742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:25,829][626795] Updated weights for policy 0, policy_version 194752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:27,624][626795] Updated weights for policy 0, policy_version 194762 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:28,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44484.6, 300 sec: 42848.3). Total num frames: 1595547648. Throughput: 0: 11004.1. Samples: 148861776. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:28,977][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:29,343][626795] Updated weights for policy 0, policy_version 194772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:31,088][626795] Updated weights for policy 0, policy_version 194782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:32,790][626795] Updated weights for policy 0, policy_version 194792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:33,975][24592] Fps is (10 sec: 48333.3, 60 sec: 44783.3, 300 sec: 42876.1). Total num frames: 1595793408. Throughput: 0: 10994.3. Samples: 148933488. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:33,977][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:34,456][626795] Updated weights for policy 0, policy_version 194802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:36,115][626795] Updated weights for policy 0, policy_version 194812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:37,876][626795] Updated weights for policy 0, policy_version 194822 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:41,858][24592] Fps is (10 sec: 36246.8, 60 sec: 42599.9, 300 sec: 42406.2). Total num frames: 1596014592. Throughput: 0: 10349.6. Samples: 149005338. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:41,859][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:42,891][626795] Updated weights for policy 0, policy_version 194832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:43,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42188.8, 300 sec: 42348.5). Total num frames: 1596104704. Throughput: 0: 10216.5. Samples: 149005644. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:43,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:44,719][626795] Updated weights for policy 0, policy_version 194842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:46,528][626795] Updated weights for policy 0, policy_version 194852 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:48,347][626795] Updated weights for policy 0, policy_version 194862 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:48,976][24592] Fps is (10 sec: 43730.7, 60 sec: 41778.7, 300 sec: 42783.1). Total num frames: 1596325888. Throughput: 0: 10610.9. Samples: 149070000. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:48,977][24592] Avg episode reward: [(0, '5.092')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:50,286][626795] Updated weights for policy 0, policy_version 194872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:52,093][626795] Updated weights for policy 0, policy_version 194882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:53,918][626795] Updated weights for policy 0, policy_version 194892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:53,975][24592] Fps is (10 sec: 45055.9, 60 sec: 41779.2, 300 sec: 42820.5). Total num frames: 1596555264. Throughput: 0: 10832.7. Samples: 149137008. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:53,976][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:55,656][626795] Updated weights for policy 0, policy_version 194902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:57,284][626795] Updated weights for policy 0, policy_version 194912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:12:58,951][626795] Updated weights for policy 0, policy_version 194922 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:58,976][24592] Fps is (10 sec: 47516.9, 60 sec: 41915.6, 300 sec: 42848.3). Total num frames: 1596801024. Throughput: 0: 10858.2. Samples: 149172282. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:12:58,978][24592] Avg episode reward: [(0, '4.432')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:00,758][626795] Updated weights for policy 0, policy_version 194932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:02,554][626795] Updated weights for policy 0, policy_version 194942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:03,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44231.1, 300 sec: 42848.6). Total num frames: 1597030400. Throughput: 0: 10895.7. Samples: 149244384. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:03,977][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:04,026][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000194951_1597038592.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:04,102][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000193706_1586839552.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:04,146][626795] Updated weights for policy 0, policy_version 194952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:05,977][626795] Updated weights for policy 0, policy_version 194962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:07,650][626795] Updated weights for policy 0, policy_version 194972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:08,976][24592] Fps is (10 sec: 46695.2, 60 sec: 44373.4, 300 sec: 42820.6). Total num frames: 1597267968. Throughput: 0: 10876.8. Samples: 149315280. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:08,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:09,399][626795] Updated weights for policy 0, policy_version 194982 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:11,151][626795] Updated weights for policy 0, policy_version 194992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:12,846][626795] Updated weights for policy 0, policy_version 195002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:13,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44510.5, 300 sec: 42820.6). Total num frames: 1597505536. Throughput: 0: 10867.7. Samples: 149350824. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:13,976][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:17,308][626795] Updated weights for policy 0, policy_version 195012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:18,975][24592] Fps is (10 sec: 34406.1, 60 sec: 42325.3, 300 sec: 42404.0). Total num frames: 1597612032. Throughput: 0: 10127.7. Samples: 149389236. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:18,976][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:19,039][626795] Updated weights for policy 0, policy_version 195022 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:20,711][626795] Updated weights for policy 0, policy_version 195032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:22,480][626795] Updated weights for policy 0, policy_version 195042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:23,975][24592] Fps is (10 sec: 35225.6, 60 sec: 42462.0, 300 sec: 42946.5). Total num frames: 1597857792. Throughput: 0: 10814.0. Samples: 149460798. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:23,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:24,101][626795] Updated weights for policy 0, policy_version 195052 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:25,882][626795] Updated weights for policy 0, policy_version 195062 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:27,661][626795] Updated weights for policy 0, policy_version 195072 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:28,975][24592] Fps is (10 sec: 48333.4, 60 sec: 42461.9, 300 sec: 42987.2). Total num frames: 1598095360. Throughput: 0: 10909.5. Samples: 149496570. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:28,976][24592] Avg episode reward: [(0, '4.298')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:29,323][626795] Updated weights for policy 0, policy_version 195082 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:31,025][626795] Updated weights for policy 0, policy_version 195092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:32,779][626795] Updated weights for policy 0, policy_version 195102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:33,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42325.3, 300 sec: 42987.2). Total num frames: 1598332928. Throughput: 0: 11081.9. Samples: 149568678. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:33,977][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:34,480][626795] Updated weights for policy 0, policy_version 195112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:36,166][626795] Updated weights for policy 0, policy_version 195122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:37,845][626795] Updated weights for policy 0, policy_version 195132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:38,984][24592] Fps is (10 sec: 47472.7, 60 sec: 44741.4, 300 sec: 42985.9). Total num frames: 1598570496. Throughput: 0: 11190.3. Samples: 149640666. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:38,985][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:39,589][626795] Updated weights for policy 0, policy_version 195142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:41,330][626795] Updated weights for policy 0, policy_version 195152 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:42,984][626795] Updated weights for policy 0, policy_version 195162 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:43,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45192.6, 300 sec: 43014.9). Total num frames: 1598816256. Throughput: 0: 11191.1. Samples: 149675880. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:43,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:44,640][626795] Updated weights for policy 0, policy_version 195172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:46,450][626795] Updated weights for policy 0, policy_version 195182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:48,181][626795] Updated weights for policy 0, policy_version 195192 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:48,976][24592] Fps is (10 sec: 47553.0, 60 sec: 45329.5, 300 sec: 42987.1). Total num frames: 1599045632. Throughput: 0: 11199.0. Samples: 149748342. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:48,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:53,302][626795] Updated weights for policy 0, policy_version 195202 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:53,975][24592] Fps is (10 sec: 30310.2, 60 sec: 42735.0, 300 sec: 42431.8). Total num frames: 1599119360. Throughput: 0: 10257.5. Samples: 149776866. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:53,976][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:55,308][626795] Updated weights for policy 0, policy_version 195212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:57,164][626795] Updated weights for policy 0, policy_version 195222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:13:58,895][626795] Updated weights for policy 0, policy_version 195232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:58,976][24592] Fps is (10 sec: 29490.4, 60 sec: 42325.1, 300 sec: 42403.9). Total num frames: 1599340544. Throughput: 0: 10199.6. Samples: 149809812. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:13:58,979][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:00,814][626795] Updated weights for policy 0, policy_version 195242 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:02,733][626795] Updated weights for policy 0, policy_version 195252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:03,975][24592] Fps is (10 sec: 43417.3, 60 sec: 42052.2, 300 sec: 42876.1). Total num frames: 1599553536. Throughput: 0: 10809.3. Samples: 149875656. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:03,977][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:04,655][626795] Updated weights for policy 0, policy_version 195262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:06,424][626795] Updated weights for policy 0, policy_version 195272 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:08,335][626795] Updated weights for policy 0, policy_version 195282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:08,976][24592] Fps is (10 sec: 43417.5, 60 sec: 41778.8, 300 sec: 42821.4). Total num frames: 1599774720. Throughput: 0: 10704.5. Samples: 149942508. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:08,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:10,046][626795] Updated weights for policy 0, policy_version 195292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:11,672][626795] Updated weights for policy 0, policy_version 195302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:13,391][626795] Updated weights for policy 0, policy_version 195312 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:13,975][24592] Fps is (10 sec: 45875.8, 60 sec: 41779.2, 300 sec: 42820.6). Total num frames: 1600012288. Throughput: 0: 10705.5. Samples: 149978316. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:13,976][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:15,140][626795] Updated weights for policy 0, policy_version 195322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:16,802][626795] Updated weights for policy 0, policy_version 195332 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:18,435][626795] Updated weights for policy 0, policy_version 195342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:18,975][24592] Fps is (10 sec: 48335.4, 60 sec: 44100.3, 300 sec: 42848.7). Total num frames: 1600258048. Throughput: 0: 10709.5. Samples: 150050604. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:18,977][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:20,313][626795] Updated weights for policy 0, policy_version 195352 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:21,891][626795] Updated weights for policy 0, policy_version 195362 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:23,621][626795] Updated weights for policy 0, policy_version 195372 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:23,975][24592] Fps is (10 sec: 49151.9, 60 sec: 44100.3, 300 sec: 42876.1). Total num frames: 1600503808. Throughput: 0: 10710.8. Samples: 150122562. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:23,978][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:27,570][626795] Updated weights for policy 0, policy_version 195382 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:28,975][24592] Fps is (10 sec: 37683.4, 60 sec: 42325.3, 300 sec: 42515.1). Total num frames: 1600634880. Throughput: 0: 10266.4. Samples: 150137868. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:28,977][24592] Avg episode reward: [(0, '4.959')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:29,169][626795] Updated weights for policy 0, policy_version 195392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:30,965][626795] Updated weights for policy 0, policy_version 195402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:32,664][626795] Updated weights for policy 0, policy_version 195412 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:33,975][24592] Fps is (10 sec: 36864.1, 60 sec: 42325.4, 300 sec: 42515.1). Total num frames: 1600872448. Throughput: 0: 10139.7. Samples: 150204624. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:33,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:34,379][626795] Updated weights for policy 0, policy_version 195422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:36,078][626795] Updated weights for policy 0, policy_version 195432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:37,797][626795] Updated weights for policy 0, policy_version 195442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:38,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42331.4, 300 sec: 43070.5). Total num frames: 1601110016. Throughput: 0: 11095.7. Samples: 150276174. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:38,978][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:39,419][626795] Updated weights for policy 0, policy_version 195452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:41,228][626795] Updated weights for policy 0, policy_version 195462 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:42,935][626795] Updated weights for policy 0, policy_version 195472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:43,976][24592] Fps is (10 sec: 48331.9, 60 sec: 42325.2, 300 sec: 43098.2). Total num frames: 1601355776. Throughput: 0: 11164.1. Samples: 150312192. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:43,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:44,598][626795] Updated weights for policy 0, policy_version 195482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:46,404][626795] Updated weights for policy 0, policy_version 195492 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:48,003][626795] Updated weights for policy 0, policy_version 195502 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:48,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42462.1, 300 sec: 43098.2). Total num frames: 1601593344. Throughput: 0: 11289.6. Samples: 150383688. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:48,977][24592] Avg episode reward: [(0, '4.857')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:49,736][626795] Updated weights for policy 0, policy_version 195512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:51,487][626795] Updated weights for policy 0, policy_version 195522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:53,222][626795] Updated weights for policy 0, policy_version 195532 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:53,975][24592] Fps is (10 sec: 47514.1, 60 sec: 45192.5, 300 sec: 43126.0). Total num frames: 1601830912. Throughput: 0: 11391.1. Samples: 150455100. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:53,976][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:54,938][626795] Updated weights for policy 0, policy_version 195542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:56,686][626795] Updated weights for policy 0, policy_version 195552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:14:58,448][626795] Updated weights for policy 0, policy_version 195562 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:58,975][24592] Fps is (10 sec: 46694.1, 60 sec: 45329.4, 300 sec: 43126.0). Total num frames: 1602060288. Throughput: 0: 11378.8. Samples: 150490362. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:14:58,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:03,723][626795] Updated weights for policy 0, policy_version 195572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:03,976][24592] Fps is (10 sec: 30310.1, 60 sec: 43008.0, 300 sec: 42542.8). Total num frames: 1602134016. Throughput: 0: 10480.0. Samples: 150522204. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:03,976][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:03,978][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000195573_1602134016.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:04,060][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000194317_1591844864.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:05,735][626795] Updated weights for policy 0, policy_version 195582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:07,605][626795] Updated weights for policy 0, policy_version 195592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:08,975][24592] Fps is (10 sec: 28672.3, 60 sec: 42871.9, 300 sec: 42487.3). Total num frames: 1602347008. Throughput: 0: 10280.8. Samples: 150585198. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:08,976][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:09,440][626795] Updated weights for policy 0, policy_version 195602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:11,286][626795] Updated weights for policy 0, policy_version 195612 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:13,125][626795] Updated weights for policy 0, policy_version 195622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:13,975][24592] Fps is (10 sec: 43418.0, 60 sec: 42598.4, 300 sec: 42987.5). Total num frames: 1602568192. Throughput: 0: 10656.7. Samples: 150617418. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:13,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:15,047][626795] Updated weights for policy 0, policy_version 195632 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:16,730][626795] Updated weights for policy 0, policy_version 195642 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:18,446][626795] Updated weights for policy 0, policy_version 195652 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:18,975][24592] Fps is (10 sec: 45875.2, 60 sec: 42461.9, 300 sec: 43015.0). Total num frames: 1602805760. Throughput: 0: 10697.6. Samples: 150686016. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:18,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:20,181][626795] Updated weights for policy 0, policy_version 195662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:21,893][626795] Updated weights for policy 0, policy_version 195672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:23,668][626795] Updated weights for policy 0, policy_version 195682 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:23,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42325.3, 300 sec: 43014.9). Total num frames: 1603043328. Throughput: 0: 10712.9. Samples: 150758256. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:23,976][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:25,309][626795] Updated weights for policy 0, policy_version 195692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:27,047][626795] Updated weights for policy 0, policy_version 195702 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:28,801][626795] Updated weights for policy 0, policy_version 195712 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:28,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44100.3, 300 sec: 42987.2). Total num frames: 1603280896. Throughput: 0: 10690.7. Samples: 150793272. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:28,976][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:30,445][626795] Updated weights for policy 0, policy_version 195722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:32,230][626795] Updated weights for policy 0, policy_version 195732 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:33,884][626795] Updated weights for policy 0, policy_version 195742 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:33,975][24592] Fps is (10 sec: 47513.4, 60 sec: 44100.2, 300 sec: 43014.9). Total num frames: 1603518464. Throughput: 0: 10699.9. Samples: 150865182. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:33,976][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:38,133][626795] Updated weights for policy 0, policy_version 195752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:38,975][24592] Fps is (10 sec: 36044.6, 60 sec: 42188.8, 300 sec: 42626.2). Total num frames: 1603641344. Throughput: 0: 10044.3. Samples: 150907092. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:38,978][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:39,834][626795] Updated weights for policy 0, policy_version 195762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:41,585][626795] Updated weights for policy 0, policy_version 195772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:43,266][626795] Updated weights for policy 0, policy_version 195782 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:43,975][24592] Fps is (10 sec: 36044.9, 60 sec: 42052.4, 300 sec: 42626.2). Total num frames: 1603878912. Throughput: 0: 10055.8. Samples: 150942870. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:43,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:45,075][626795] Updated weights for policy 0, policy_version 195792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:46,592][626795] Updated weights for policy 0, policy_version 195802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:48,410][626795] Updated weights for policy 0, policy_version 195812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:48,976][24592] Fps is (10 sec: 46692.8, 60 sec: 41915.5, 300 sec: 43147.4). Total num frames: 1604108288. Throughput: 0: 10941.9. Samples: 151014594. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:48,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:50,217][626795] Updated weights for policy 0, policy_version 195822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:51,876][626795] Updated weights for policy 0, policy_version 195832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:53,682][626795] Updated weights for policy 0, policy_version 195842 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:53,975][24592] Fps is (10 sec: 46694.3, 60 sec: 41915.7, 300 sec: 43181.6). Total num frames: 1604345856. Throughput: 0: 11095.2. Samples: 151084482. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:53,976][24592] Avg episode reward: [(0, '4.332')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:55,424][626795] Updated weights for policy 0, policy_version 195852 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:57,082][626795] Updated weights for policy 0, policy_version 195862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:15:58,785][626795] Updated weights for policy 0, policy_version 195872 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:58,975][24592] Fps is (10 sec: 47515.4, 60 sec: 42052.3, 300 sec: 43209.4). Total num frames: 1604583424. Throughput: 0: 11180.1. Samples: 151120524. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:15:58,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:00,522][626795] Updated weights for policy 0, policy_version 195882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:02,173][626795] Updated weights for policy 0, policy_version 195892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:03,930][626795] Updated weights for policy 0, policy_version 195902 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:03,975][24592] Fps is (10 sec: 48333.1, 60 sec: 44919.6, 300 sec: 43237.2). Total num frames: 1604829184. Throughput: 0: 11258.5. Samples: 151192650. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:03,976][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:05,629][626795] Updated weights for policy 0, policy_version 195912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:07,400][626795] Updated weights for policy 0, policy_version 195922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:08,975][24592] Fps is (10 sec: 48332.3, 60 sec: 45329.0, 300 sec: 43237.1). Total num frames: 1605066752. Throughput: 0: 11249.3. Samples: 151264476. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:08,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:09,140][626795] Updated weights for policy 0, policy_version 195932 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:13,976][24592] Fps is (10 sec: 30307.9, 60 sec: 42734.4, 300 sec: 42653.9). Total num frames: 1605132288. Throughput: 0: 10759.5. Samples: 151277460. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:13,978][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:14,518][626795] Updated weights for policy 0, policy_version 195942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:16,240][626795] Updated weights for policy 0, policy_version 195952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:18,191][626795] Updated weights for policy 0, policy_version 195962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:18,975][24592] Fps is (10 sec: 28672.2, 60 sec: 42461.9, 300 sec: 42598.5). Total num frames: 1605353472. Throughput: 0: 10209.9. Samples: 151324626. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:18,977][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:20,056][626795] Updated weights for policy 0, policy_version 195972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:21,775][626795] Updated weights for policy 0, policy_version 195982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:23,483][626795] Updated weights for policy 0, policy_version 195992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:23,975][24592] Fps is (10 sec: 45059.6, 60 sec: 42325.3, 300 sec: 43056.6). Total num frames: 1605582848. Throughput: 0: 10820.9. Samples: 151394034. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:23,977][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:25,300][626795] Updated weights for policy 0, policy_version 196002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:27,015][626795] Updated weights for policy 0, policy_version 196012 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:28,722][626795] Updated weights for policy 0, policy_version 196022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:28,977][24592] Fps is (10 sec: 46688.1, 60 sec: 42324.4, 300 sec: 43098.1). Total num frames: 1605820416. Throughput: 0: 10805.4. Samples: 151429128. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:28,979][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:30,610][626795] Updated weights for policy 0, policy_version 196032 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:32,264][626795] Updated weights for policy 0, policy_version 196042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:33,976][24592] Fps is (10 sec: 46692.1, 60 sec: 42188.5, 300 sec: 43098.2). Total num frames: 1606049792. Throughput: 0: 10760.1. Samples: 151498800. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:33,979][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:34,036][626795] Updated weights for policy 0, policy_version 196052 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:35,832][626795] Updated weights for policy 0, policy_version 196062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:37,596][626795] Updated weights for policy 0, policy_version 196072 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:38,976][24592] Fps is (10 sec: 45879.9, 60 sec: 43963.5, 300 sec: 43070.4). Total num frames: 1606279168. Throughput: 0: 10745.4. Samples: 151568028. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:38,976][24592] Avg episode reward: [(0, '4.853')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:39,368][626795] Updated weights for policy 0, policy_version 196082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:41,205][626795] Updated weights for policy 0, policy_version 196092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:42,850][626795] Updated weights for policy 0, policy_version 196102 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:43,975][24592] Fps is (10 sec: 46696.6, 60 sec: 43963.7, 300 sec: 43042.7). Total num frames: 1606516736. Throughput: 0: 10721.1. Samples: 151602972. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:43,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:44,615][626795] Updated weights for policy 0, policy_version 196112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:48,759][626795] Updated weights for policy 0, policy_version 196122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:48,976][24592] Fps is (10 sec: 36042.8, 60 sec: 42188.4, 300 sec: 42681.6). Total num frames: 1606639616. Throughput: 0: 10137.9. Samples: 151648866. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:48,979][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:50,497][626795] Updated weights for policy 0, policy_version 196132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:52,273][626795] Updated weights for policy 0, policy_version 196142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:53,806][626772] Signal inference workers to stop experience collection... (2100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:53,807][626772] Signal inference workers to resume experience collection... (2100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:53,819][626795] InferenceWorker_p0-w0: stopping experience collection (2100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:53,821][626795] InferenceWorker_p0-w0: resuming experience collection (2100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:53,975][24592] Fps is (10 sec: 35225.7, 60 sec: 42052.3, 300 sec: 42653.9). Total num frames: 1606868992. Throughput: 0: 10019.6. Samples: 151715358. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:53,976][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:54,039][626795] Updated weights for policy 0, policy_version 196152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:55,864][626795] Updated weights for policy 0, policy_version 196162 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:57,541][626795] Updated weights for policy 0, policy_version 196172 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:58,975][24592] Fps is (10 sec: 46698.7, 60 sec: 42052.3, 300 sec: 43143.8). Total num frames: 1607106560. Throughput: 0: 10510.2. Samples: 151750410. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:16:58,976][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:16:59,362][626795] Updated weights for policy 0, policy_version 196182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:00,965][626795] Updated weights for policy 0, policy_version 196192 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:02,671][626795] Updated weights for policy 0, policy_version 196202 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:03,975][24592] Fps is (10 sec: 47513.3, 60 sec: 41915.7, 300 sec: 43181.6). Total num frames: 1607344128. Throughput: 0: 11040.7. Samples: 151821456. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:03,977][24592] Avg episode reward: [(0, '4.385')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000196209_1607344128.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:04,035][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000194951_1597038592.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:04,518][626795] Updated weights for policy 0, policy_version 196212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:06,279][626795] Updated weights for policy 0, policy_version 196222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:07,851][626795] Updated weights for policy 0, policy_version 196232 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:08,975][24592] Fps is (10 sec: 47513.7, 60 sec: 41915.8, 300 sec: 43209.5). Total num frames: 1607581696. Throughput: 0: 11077.1. Samples: 151892502. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:08,977][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:09,612][626795] Updated weights for policy 0, policy_version 196242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:11,327][626795] Updated weights for policy 0, policy_version 196252 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:13,151][626795] Updated weights for policy 0, policy_version 196262 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:13,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44783.5, 300 sec: 43209.3). Total num frames: 1607819264. Throughput: 0: 11090.1. Samples: 151928166. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:13,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:14,820][626795] Updated weights for policy 0, policy_version 196272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:16,591][626795] Updated weights for policy 0, policy_version 196282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:18,276][626795] Updated weights for policy 0, policy_version 196292 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:18,975][24592] Fps is (10 sec: 47512.8, 60 sec: 45055.9, 300 sec: 43209.3). Total num frames: 1608056832. Throughput: 0: 11113.3. Samples: 151998894. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:18,977][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:23,160][626795] Updated weights for policy 0, policy_version 196302 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:23,975][24592] Fps is (10 sec: 31948.9, 60 sec: 42598.4, 300 sec: 42681.7). Total num frames: 1608138752. Throughput: 0: 10328.2. Samples: 152032794. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:23,977][24592] Avg episode reward: [(0, '4.309')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:25,020][626795] Updated weights for policy 0, policy_version 196312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:26,797][626795] Updated weights for policy 0, policy_version 196322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:28,696][626795] Updated weights for policy 0, policy_version 196332 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:28,975][24592] Fps is (10 sec: 30311.0, 60 sec: 42326.3, 300 sec: 42598.4). Total num frames: 1608359936. Throughput: 0: 10305.5. Samples: 152066718. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:28,977][24592] Avg episode reward: [(0, '4.937')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:30,417][626795] Updated weights for policy 0, policy_version 196342 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:32,029][626795] Updated weights for policy 0, policy_version 196352 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:33,904][626795] Updated weights for policy 0, policy_version 196362 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:33,975][24592] Fps is (10 sec: 45875.3, 60 sec: 42462.2, 300 sec: 43074.8). Total num frames: 1608597504. Throughput: 0: 10812.2. Samples: 152135406. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:33,976][24592] Avg episode reward: [(0, '4.392')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:35,657][626795] Updated weights for policy 0, policy_version 196372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:37,265][626795] Updated weights for policy 0, policy_version 196382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:38,975][24592] Fps is (10 sec: 47513.1, 60 sec: 42598.6, 300 sec: 43153.8). Total num frames: 1608835072. Throughput: 0: 10917.2. Samples: 152206632. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:38,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:39,046][626795] Updated weights for policy 0, policy_version 196392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:40,859][626795] Updated weights for policy 0, policy_version 196402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:42,599][626795] Updated weights for policy 0, policy_version 196412 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:43,975][24592] Fps is (10 sec: 46694.6, 60 sec: 42461.9, 300 sec: 43181.7). Total num frames: 1609064448. Throughput: 0: 10912.0. Samples: 152241450. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:43,976][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:44,284][626795] Updated weights for policy 0, policy_version 196422 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:46,029][626795] Updated weights for policy 0, policy_version 196432 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:47,891][626795] Updated weights for policy 0, policy_version 196442 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:48,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44374.0, 300 sec: 43209.3). Total num frames: 1609302016. Throughput: 0: 10896.0. Samples: 152311776. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:48,977][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:49,523][626795] Updated weights for policy 0, policy_version 196452 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:51,338][626795] Updated weights for policy 0, policy_version 196462 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:53,180][626795] Updated weights for policy 0, policy_version 196472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:53,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44509.9, 300 sec: 43181.6). Total num frames: 1609539584. Throughput: 0: 10871.1. Samples: 152381700. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:53,976][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:54,904][626795] Updated weights for policy 0, policy_version 196482 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:58,975][24592] Fps is (10 sec: 34406.4, 60 sec: 42325.3, 300 sec: 42765.0). Total num frames: 1609646080. Throughput: 0: 10554.1. Samples: 152403102. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:17:58,976][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:17:59,261][626795] Updated weights for policy 0, policy_version 196492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:01,055][626795] Updated weights for policy 0, policy_version 196502 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:02,824][626795] Updated weights for policy 0, policy_version 196512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:03,975][24592] Fps is (10 sec: 33587.1, 60 sec: 42188.8, 300 sec: 42737.2). Total num frames: 1609875456. Throughput: 0: 10142.3. Samples: 152455296. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:03,977][24592] Avg episode reward: [(0, '4.945')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:04,593][626795] Updated weights for policy 0, policy_version 196522 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:06,254][626795] Updated weights for policy 0, policy_version 196532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:08,036][626795] Updated weights for policy 0, policy_version 196542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:08,976][24592] Fps is (10 sec: 46691.5, 60 sec: 42188.3, 300 sec: 42737.2). Total num frames: 1610113024. Throughput: 0: 10968.9. Samples: 152526402. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:08,977][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:09,707][626795] Updated weights for policy 0, policy_version 196552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:11,424][626795] Updated weights for policy 0, policy_version 196562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:13,235][626795] Updated weights for policy 0, policy_version 196572 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:13,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42188.8, 300 sec: 43181.6). Total num frames: 1610350592. Throughput: 0: 11007.7. Samples: 152562066. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:13,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:14,894][626795] Updated weights for policy 0, policy_version 196582 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:16,624][626795] Updated weights for policy 0, policy_version 196592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:18,455][626795] Updated weights for policy 0, policy_version 196602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:18,975][24592] Fps is (10 sec: 47516.4, 60 sec: 42188.9, 300 sec: 43153.8). Total num frames: 1610588160. Throughput: 0: 11056.8. Samples: 152632962. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:18,977][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:20,138][626795] Updated weights for policy 0, policy_version 196612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:21,811][626795] Updated weights for policy 0, policy_version 196622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:23,692][626795] Updated weights for policy 0, policy_version 196632 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:23,975][24592] Fps is (10 sec: 46694.7, 60 sec: 44646.4, 300 sec: 43126.0). Total num frames: 1610817536. Throughput: 0: 11030.0. Samples: 152702982. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:23,976][24592] Avg episode reward: [(0, '4.450')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:25,372][626795] Updated weights for policy 0, policy_version 196642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:27,098][626795] Updated weights for policy 0, policy_version 196652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:28,872][626795] Updated weights for policy 0, policy_version 196662 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:28,975][24592] Fps is (10 sec: 46694.5, 60 sec: 44919.4, 300 sec: 43126.0). Total num frames: 1611055104. Throughput: 0: 11042.1. Samples: 152738346. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:28,976][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:33,515][626795] Updated weights for policy 0, policy_version 196672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:33,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42598.4, 300 sec: 42655.2). Total num frames: 1611153408. Throughput: 0: 10446.1. Samples: 152781852. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:33,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:35,342][626795] Updated weights for policy 0, policy_version 196682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:37,138][626795] Updated weights for policy 0, policy_version 196692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:38,771][626795] Updated weights for policy 0, policy_version 196702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:38,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42461.9, 300 sec: 42598.4). Total num frames: 1611382784. Throughput: 0: 10281.2. Samples: 152844354. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:38,977][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:40,583][626795] Updated weights for policy 0, policy_version 196712 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:42,295][626795] Updated weights for policy 0, policy_version 196722 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:43,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42598.4, 300 sec: 42626.2). Total num frames: 1611620352. Throughput: 0: 10588.9. Samples: 152879604. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:43,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:43,993][626795] Updated weights for policy 0, policy_version 196732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:45,744][626795] Updated weights for policy 0, policy_version 196742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:47,428][626795] Updated weights for policy 0, policy_version 196752 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:48,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42598.4, 300 sec: 43181.6). Total num frames: 1611857920. Throughput: 0: 11031.5. Samples: 152951712. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:48,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:49,126][626795] Updated weights for policy 0, policy_version 196762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:50,903][626795] Updated weights for policy 0, policy_version 196772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:52,629][626795] Updated weights for policy 0, policy_version 196782 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:53,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42598.4, 300 sec: 43237.2). Total num frames: 1612095488. Throughput: 0: 11014.5. Samples: 153022050. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:53,977][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:54,471][626795] Updated weights for policy 0, policy_version 196792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:56,102][626795] Updated weights for policy 0, policy_version 196802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:57,845][626795] Updated weights for policy 0, policy_version 196812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:58,975][24592] Fps is (10 sec: 47513.7, 60 sec: 44782.9, 300 sec: 43320.4). Total num frames: 1612333056. Throughput: 0: 11017.0. Samples: 153057828. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:18:58,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:18:59,536][626795] Updated weights for policy 0, policy_version 196822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:01,222][626795] Updated weights for policy 0, policy_version 196832 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:02,964][626795] Updated weights for policy 0, policy_version 196842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:03,976][24592] Fps is (10 sec: 47512.5, 60 sec: 44919.3, 300 sec: 43376.0). Total num frames: 1612570624. Throughput: 0: 11022.2. Samples: 153128964. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:03,976][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000196847_1612570624.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:04,042][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000195573_1602134016.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:04,845][626795] Updated weights for policy 0, policy_version 196852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:08,976][24592] Fps is (10 sec: 31948.4, 60 sec: 42325.7, 300 sec: 42848.3). Total num frames: 1612652544. Throughput: 0: 10195.7. Samples: 153161790. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:08,979][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:09,781][626795] Updated weights for policy 0, policy_version 196862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:11,484][626795] Updated weights for policy 0, policy_version 196872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:13,436][626795] Updated weights for policy 0, policy_version 196882 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:13,975][24592] Fps is (10 sec: 30311.2, 60 sec: 42052.3, 300 sec: 42765.0). Total num frames: 1612873728. Throughput: 0: 10163.5. Samples: 153195702. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:13,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:15,061][626795] Updated weights for policy 0, policy_version 196892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:16,873][626795] Updated weights for policy 0, policy_version 196902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:18,530][626795] Updated weights for policy 0, policy_version 196912 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:18,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42188.7, 300 sec: 42765.0). Total num frames: 1613119488. Throughput: 0: 10747.4. Samples: 153265488. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:18,976][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:20,255][626795] Updated weights for policy 0, policy_version 196922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:22,031][626795] Updated weights for policy 0, policy_version 196932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:23,700][626795] Updated weights for policy 0, policy_version 196942 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:23,975][24592] Fps is (10 sec: 48332.5, 60 sec: 42325.3, 300 sec: 43126.0). Total num frames: 1613357056. Throughput: 0: 10946.4. Samples: 153336942. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:23,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:25,415][626795] Updated weights for policy 0, policy_version 196952 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:27,115][626795] Updated weights for policy 0, policy_version 196962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:28,765][626795] Updated weights for policy 0, policy_version 196972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:28,975][24592] Fps is (10 sec: 47514.3, 60 sec: 42325.3, 300 sec: 43126.0). Total num frames: 1613594624. Throughput: 0: 10965.6. Samples: 153373056. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:28,976][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:30,614][626795] Updated weights for policy 0, policy_version 196982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:32,283][626795] Updated weights for policy 0, policy_version 196992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:33,976][24592] Fps is (10 sec: 47512.0, 60 sec: 44646.1, 300 sec: 43126.0). Total num frames: 1613832192. Throughput: 0: 10955.8. Samples: 153444726. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:33,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:34,010][626795] Updated weights for policy 0, policy_version 197002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:35,694][626795] Updated weights for policy 0, policy_version 197012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:37,427][626795] Updated weights for policy 0, policy_version 197022 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:38,975][24592] Fps is (10 sec: 48332.5, 60 sec: 44919.4, 300 sec: 43126.0). Total num frames: 1614077952. Throughput: 0: 10990.3. Samples: 153516612. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:38,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:39,107][626795] Updated weights for policy 0, policy_version 197032 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:43,976][24592] Fps is (10 sec: 32768.5, 60 sec: 42325.2, 300 sec: 42598.4). Total num frames: 1614159872. Throughput: 0: 10195.1. Samples: 153516612. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:43,978][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:44,024][626795] Updated weights for policy 0, policy_version 197042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:45,918][626795] Updated weights for policy 0, policy_version 197052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:47,526][626795] Updated weights for policy 0, policy_version 197062 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:48,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42188.8, 300 sec: 42570.6). Total num frames: 1614389248. Throughput: 0: 10122.4. Samples: 153584472. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:48,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:49,255][626795] Updated weights for policy 0, policy_version 197072 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:51,029][626795] Updated weights for policy 0, policy_version 197082 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:52,700][626795] Updated weights for policy 0, policy_version 197092 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:53,975][24592] Fps is (10 sec: 47514.8, 60 sec: 42325.4, 300 sec: 42626.2). Total num frames: 1614635008. Throughput: 0: 10991.4. Samples: 153656400. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:53,977][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:54,438][626795] Updated weights for policy 0, policy_version 197102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:56,243][626795] Updated weights for policy 0, policy_version 197112 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:57,815][626795] Updated weights for policy 0, policy_version 197122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:58,975][24592] Fps is (10 sec: 48333.2, 60 sec: 42325.3, 300 sec: 43181.6). Total num frames: 1614872576. Throughput: 0: 11024.4. Samples: 153691800. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:19:58,976][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:19:59,545][626795] Updated weights for policy 0, policy_version 197132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:01,328][626795] Updated weights for policy 0, policy_version 197142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:03,058][626795] Updated weights for policy 0, policy_version 197152 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:03,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42325.5, 300 sec: 43264.9). Total num frames: 1615110144. Throughput: 0: 11066.3. Samples: 153763470. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:03,976][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:04,721][626795] Updated weights for policy 0, policy_version 197162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:06,487][626795] Updated weights for policy 0, policy_version 197172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:08,175][626795] Updated weights for policy 0, policy_version 197182 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:08,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45056.1, 300 sec: 43348.2). Total num frames: 1615355904. Throughput: 0: 11076.5. Samples: 153835386. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:08,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:09,813][626795] Updated weights for policy 0, policy_version 197192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:11,542][626795] Updated weights for policy 0, policy_version 197202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:13,341][626795] Updated weights for policy 0, policy_version 197212 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:13,976][24592] Fps is (10 sec: 48332.1, 60 sec: 45329.0, 300 sec: 43348.2). Total num frames: 1615593472. Throughput: 0: 11062.0. Samples: 153870846. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:13,978][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:15,090][626795] Updated weights for policy 0, policy_version 197222 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:19,430][24592] Fps is (10 sec: 32126.1, 60 sec: 42549.1, 300 sec: 42810.1). Total num frames: 1615691776. Throughput: 0: 10156.4. Samples: 153906378. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:19,431][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:20,164][626795] Updated weights for policy 0, policy_version 197232 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:21,906][626795] Updated weights for policy 0, policy_version 197242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:23,581][626795] Updated weights for policy 0, policy_version 197252 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:23,975][24592] Fps is (10 sec: 30310.4, 60 sec: 42325.3, 300 sec: 42765.0). Total num frames: 1615896576. Throughput: 0: 10146.8. Samples: 153973218. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:23,976][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:25,335][626795] Updated weights for policy 0, policy_version 197262 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:27,043][626795] Updated weights for policy 0, policy_version 197272 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:28,683][626795] Updated weights for policy 0, policy_version 197282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:28,975][24592] Fps is (10 sec: 46344.5, 60 sec: 42325.3, 300 sec: 42765.0). Total num frames: 1616134144. Throughput: 0: 10938.9. Samples: 154008858. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:28,976][24592] Avg episode reward: [(0, '5.020')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:30,472][626795] Updated weights for policy 0, policy_version 197292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:32,204][626795] Updated weights for policy 0, policy_version 197302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:33,812][626795] Updated weights for policy 0, policy_version 197312 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:33,975][24592] Fps is (10 sec: 48333.6, 60 sec: 42462.2, 300 sec: 43181.6). Total num frames: 1616379904. Throughput: 0: 11031.0. Samples: 154080864. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:33,976][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:35,540][626795] Updated weights for policy 0, policy_version 197322 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:37,317][626795] Updated weights for policy 0, policy_version 197332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:38,916][626795] Updated weights for policy 0, policy_version 197342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:38,975][24592] Fps is (10 sec: 49152.3, 60 sec: 42462.0, 300 sec: 43209.3). Total num frames: 1616625664. Throughput: 0: 11040.9. Samples: 154153242. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:38,977][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:40,681][626795] Updated weights for policy 0, policy_version 197352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:42,321][626795] Updated weights for policy 0, policy_version 197362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:43,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45056.2, 300 sec: 43237.2). Total num frames: 1616863232. Throughput: 0: 11058.5. Samples: 154189434. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:43,978][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:44,113][626795] Updated weights for policy 0, policy_version 197372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:45,769][626795] Updated weights for policy 0, policy_version 197382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:47,493][626795] Updated weights for policy 0, policy_version 197392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:48,975][24592] Fps is (10 sec: 46694.0, 60 sec: 45056.0, 300 sec: 43209.3). Total num frames: 1617092608. Throughput: 0: 11052.0. Samples: 154260810. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:48,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:49,319][626795] Updated weights for policy 0, policy_version 197402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:51,283][626795] Updated weights for policy 0, policy_version 197412 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:54,938][24592] Fps is (10 sec: 31384.2, 60 sec: 42194.3, 300 sec: 42653.6). Total num frames: 1617207296. Throughput: 0: 9964.0. Samples: 154293360. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:54,939][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:56,391][626795] Updated weights for policy 0, policy_version 197422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:58,109][626795] Updated weights for policy 0, policy_version 197432 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:58,976][24592] Fps is (10 sec: 31127.5, 60 sec: 42188.3, 300 sec: 42626.1). Total num frames: 1617403904. Throughput: 0: 10081.5. Samples: 154324518. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:20:58,977][24592] Avg episode reward: [(0, '4.908')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:20:59,832][626795] Updated weights for policy 0, policy_version 197442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:01,455][626795] Updated weights for policy 0, policy_version 197452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:03,274][626795] Updated weights for policy 0, policy_version 197462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:03,976][24592] Fps is (10 sec: 48039.5, 60 sec: 42188.2, 300 sec: 42626.1). Total num frames: 1617641472. Throughput: 0: 10997.6. Samples: 154396278. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:03,978][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000197466_1617641472.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:04,059][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000196209_1607344128.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:04,920][626795] Updated weights for policy 0, policy_version 197472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:06,807][626795] Updated weights for policy 0, policy_version 197482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:08,312][626795] Updated weights for policy 0, policy_version 197492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:08,975][24592] Fps is (10 sec: 47516.6, 60 sec: 42052.2, 300 sec: 43209.4). Total num frames: 1617879040. Throughput: 0: 10977.0. Samples: 154467180. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:08,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:10,177][626795] Updated weights for policy 0, policy_version 197502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:11,883][626795] Updated weights for policy 0, policy_version 197512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:13,566][626795] Updated weights for policy 0, policy_version 197522 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:13,976][24592] Fps is (10 sec: 47514.9, 60 sec: 42052.0, 300 sec: 43264.8). Total num frames: 1618116608. Throughput: 0: 10973.1. Samples: 154502652. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:13,977][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:15,236][626795] Updated weights for policy 0, policy_version 197532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:17,006][626795] Updated weights for policy 0, policy_version 197542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:18,686][626795] Updated weights for policy 0, policy_version 197552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:18,975][24592] Fps is (10 sec: 47514.0, 60 sec: 44712.3, 300 sec: 43292.6). Total num frames: 1618354176. Throughput: 0: 10968.0. Samples: 154574424. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:18,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:20,483][626795] Updated weights for policy 0, policy_version 197562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:22,138][626795] Updated weights for policy 0, policy_version 197572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:23,863][626795] Updated weights for policy 0, policy_version 197582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:23,975][24592] Fps is (10 sec: 47516.2, 60 sec: 44919.5, 300 sec: 43292.8). Total num frames: 1618591744. Throughput: 0: 10950.8. Samples: 154646028. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:23,976][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:25,706][626795] Updated weights for policy 0, policy_version 197592 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:30,459][24592] Fps is (10 sec: 32815.6, 60 sec: 42236.9, 300 sec: 42772.2). Total num frames: 1618731008. Throughput: 0: 10551.3. Samples: 154679892. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:30,460][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:30,769][626795] Updated weights for policy 0, policy_version 197602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:32,508][626795] Updated weights for policy 0, policy_version 197612 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:33,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42052.3, 300 sec: 42792.8). Total num frames: 1618903040. Throughput: 0: 10027.6. Samples: 154712052. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:33,976][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:34,162][626795] Updated weights for policy 0, policy_version 197622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:35,935][626795] Updated weights for policy 0, policy_version 197632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:37,527][626795] Updated weights for policy 0, policy_version 197642 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:38,975][24592] Fps is (10 sec: 48093.6, 60 sec: 41915.7, 300 sec: 42792.8). Total num frames: 1619140608. Throughput: 0: 11142.3. Samples: 154784034. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:38,976][24592] Avg episode reward: [(0, '5.066')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:39,370][626795] Updated weights for policy 0, policy_version 197652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:41,065][626795] Updated weights for policy 0, policy_version 197662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:42,736][626795] Updated weights for policy 0, policy_version 197672 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:43,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42052.3, 300 sec: 43209.5). Total num frames: 1619386368. Throughput: 0: 11007.4. Samples: 154819842. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:43,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:44,478][626795] Updated weights for policy 0, policy_version 197682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:46,220][626795] Updated weights for policy 0, policy_version 197692 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:47,826][626795] Updated weights for policy 0, policy_version 197702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:48,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42188.8, 300 sec: 43237.1). Total num frames: 1619623936. Throughput: 0: 11016.2. Samples: 154891998. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:48,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:49,518][626795] Updated weights for policy 0, policy_version 197712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:51,272][626795] Updated weights for policy 0, policy_version 197722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:53,094][626795] Updated weights for policy 0, policy_version 197732 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:53,975][24592] Fps is (10 sec: 47513.2, 60 sec: 44958.3, 300 sec: 43237.1). Total num frames: 1619861504. Throughput: 0: 11017.9. Samples: 154962984. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:53,977][24592] Avg episode reward: [(0, '4.902')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:54,716][626795] Updated weights for policy 0, policy_version 197742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:56,433][626795] Updated weights for policy 0, policy_version 197752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:58,097][626795] Updated weights for policy 0, policy_version 197762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:58,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44919.9, 300 sec: 43237.1). Total num frames: 1620099072. Throughput: 0: 11049.2. Samples: 154999860. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:21:58,976][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:21:59,876][626795] Updated weights for policy 0, policy_version 197772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:01,742][626795] Updated weights for policy 0, policy_version 197782 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:05,989][24592] Fps is (10 sec: 34094.4, 60 sec: 42404.6, 300 sec: 42723.3). Total num frames: 1620271104. Throughput: 0: 10515.2. Samples: 155068782. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:05,990][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:06,766][626795] Updated weights for policy 0, policy_version 197792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:08,550][626795] Updated weights for policy 0, policy_version 197802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:08,975][24592] Fps is (10 sec: 31129.8, 60 sec: 42188.9, 300 sec: 42681.7). Total num frames: 1620410368. Throughput: 0: 10115.6. Samples: 155101230. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:08,977][24592] Avg episode reward: [(0, '4.992')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:10,240][626795] Updated weights for policy 0, policy_version 197812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:11,880][626795] Updated weights for policy 0, policy_version 197822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:13,649][626795] Updated weights for policy 0, policy_version 197832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:13,976][24592] Fps is (10 sec: 47184.1, 60 sec: 42189.1, 300 sec: 42681.7). Total num frames: 1620647936. Throughput: 0: 10497.0. Samples: 155136690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:13,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:15,444][626795] Updated weights for policy 0, policy_version 197842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:16,964][626795] Updated weights for policy 0, policy_version 197852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:18,778][626795] Updated weights for policy 0, policy_version 197862 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:18,975][24592] Fps is (10 sec: 48332.2, 60 sec: 42325.2, 300 sec: 43237.1). Total num frames: 1620893696. Throughput: 0: 11030.2. Samples: 155208414. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:18,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:20,487][626795] Updated weights for policy 0, policy_version 197872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:22,275][626795] Updated weights for policy 0, policy_version 197882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:23,916][626795] Updated weights for policy 0, policy_version 197892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:23,975][24592] Fps is (10 sec: 48333.9, 60 sec: 42325.4, 300 sec: 43292.6). Total num frames: 1621131264. Throughput: 0: 11025.3. Samples: 155280174. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:23,976][24592] Avg episode reward: [(0, '4.922')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:25,690][626795] Updated weights for policy 0, policy_version 197902 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:27,252][626795] Updated weights for policy 0, policy_version 197912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:28,975][24592] Fps is (10 sec: 47514.3, 60 sec: 45078.2, 300 sec: 43292.6). Total num frames: 1621368832. Throughput: 0: 11032.7. Samples: 155316312. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:28,977][24592] Avg episode reward: [(0, '4.993')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:29,102][626795] Updated weights for policy 0, policy_version 197922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:30,791][626795] Updated weights for policy 0, policy_version 197932 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:32,454][626795] Updated weights for policy 0, policy_version 197942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:33,976][24592] Fps is (10 sec: 48331.6, 60 sec: 45192.4, 300 sec: 43320.4). Total num frames: 1621614592. Throughput: 0: 11031.7. Samples: 155388426. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:33,979][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:34,175][626795] Updated weights for policy 0, policy_version 197952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:35,978][626795] Updated weights for policy 0, policy_version 197962 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:37,781][626795] Updated weights for policy 0, policy_version 197972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:41,532][24592] Fps is (10 sec: 33925.3, 60 sec: 42429.0, 300 sec: 42783.0). Total num frames: 1621794816. Throughput: 0: 9679.6. Samples: 155423310. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:41,534][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:42,936][626795] Updated weights for policy 0, policy_version 197982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:43,976][24592] Fps is (10 sec: 29489.3, 60 sec: 42051.6, 300 sec: 42737.1). Total num frames: 1621909504. Throughput: 0: 10065.3. Samples: 155452806. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:43,978][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:44,725][626795] Updated weights for policy 0, policy_version 197992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:46,467][626795] Updated weights for policy 0, policy_version 198002 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:48,143][626795] Updated weights for policy 0, policy_version 198012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:48,975][24592] Fps is (10 sec: 47324.1, 60 sec: 42052.3, 300 sec: 42737.3). Total num frames: 1622147072. Throughput: 0: 10577.9. Samples: 155523486. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:48,977][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:49,791][626795] Updated weights for policy 0, policy_version 198022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:51,537][626795] Updated weights for policy 0, policy_version 198032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:53,258][626795] Updated weights for policy 0, policy_version 198042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:53,975][24592] Fps is (10 sec: 48337.2, 60 sec: 42188.9, 300 sec: 43209.3). Total num frames: 1622392832. Throughput: 0: 10977.2. Samples: 155595204. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:53,976][24592] Avg episode reward: [(0, '4.864')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:55,071][626795] Updated weights for policy 0, policy_version 198052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:56,662][626795] Updated weights for policy 0, policy_version 198062 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:22:58,435][626795] Updated weights for policy 0, policy_version 198072 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:58,976][24592] Fps is (10 sec: 48331.2, 60 sec: 42188.6, 300 sec: 43237.1). Total num frames: 1622630400. Throughput: 0: 10990.2. Samples: 155631252. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:22:58,978][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:00,105][626795] Updated weights for policy 0, policy_version 198082 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:01,834][626795] Updated weights for policy 0, policy_version 198092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:03,495][626795] Updated weights for policy 0, policy_version 198102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:03,977][24592] Fps is (10 sec: 47507.1, 60 sec: 44783.1, 300 sec: 43237.0). Total num frames: 1622867968. Throughput: 0: 10995.7. Samples: 155703234. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:03,978][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000198104_1622867968.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:04,054][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000196847_1612570624.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:05,262][626795] Updated weights for policy 0, policy_version 198112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:06,923][626795] Updated weights for policy 0, policy_version 198122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:08,686][626795] Updated weights for policy 0, policy_version 198132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:08,976][24592] Fps is (10 sec: 47513.0, 60 sec: 44919.1, 300 sec: 43237.0). Total num frames: 1623105536. Throughput: 0: 10995.6. Samples: 155774982. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:08,976][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:10,324][626795] Updated weights for policy 0, policy_version 198142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:12,175][626795] Updated weights for policy 0, policy_version 198152 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:16,970][24592] Fps is (10 sec: 35307.8, 60 sec: 42524.4, 300 sec: 42747.7). Total num frames: 1623326720. Throughput: 0: 10279.8. Samples: 155809686. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:16,971][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:17,189][626795] Updated weights for policy 0, policy_version 198162 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:18,953][626795] Updated weights for policy 0, policy_version 198172 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:18,975][24592] Fps is (10 sec: 31950.1, 60 sec: 42188.8, 300 sec: 42737.2). Total num frames: 1623425024. Throughput: 0: 10068.2. Samples: 155841492. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:18,977][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:20,710][626795] Updated weights for policy 0, policy_version 198182 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:22,243][626795] Updated weights for policy 0, policy_version 198192 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:23,975][24592] Fps is (10 sec: 47942.1, 60 sec: 42188.7, 300 sec: 42737.2). Total num frames: 1623662592. Throughput: 0: 11555.7. Samples: 155913774. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:23,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:24,048][626795] Updated weights for policy 0, policy_version 198202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:25,840][626795] Updated weights for policy 0, policy_version 198212 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:27,539][626795] Updated weights for policy 0, policy_version 198222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:28,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42188.7, 300 sec: 43209.3). Total num frames: 1623900160. Throughput: 0: 11038.3. Samples: 155949522. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:28,977][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:29,156][626795] Updated weights for policy 0, policy_version 198232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:30,911][626795] Updated weights for policy 0, policy_version 198242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:32,611][626795] Updated weights for policy 0, policy_version 198252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:33,975][24592] Fps is (10 sec: 48333.3, 60 sec: 42189.0, 300 sec: 43264.9). Total num frames: 1624145920. Throughput: 0: 11068.9. Samples: 156021588. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:33,976][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:34,234][626795] Updated weights for policy 0, policy_version 198262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:35,987][626795] Updated weights for policy 0, policy_version 198272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:37,689][626795] Updated weights for policy 0, policy_version 198282 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:38,975][24592] Fps is (10 sec: 48333.3, 60 sec: 45064.7, 300 sec: 43264.9). Total num frames: 1624383488. Throughput: 0: 11068.5. Samples: 156093288. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:38,978][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:39,415][626795] Updated weights for policy 0, policy_version 198292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:41,161][626795] Updated weights for policy 0, policy_version 198302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:42,822][626795] Updated weights for policy 0, policy_version 198312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:43,975][24592] Fps is (10 sec: 48332.4, 60 sec: 45329.7, 300 sec: 43292.6). Total num frames: 1624629248. Throughput: 0: 11073.3. Samples: 156129546. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:43,976][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:44,409][626795] Updated weights for policy 0, policy_version 198322 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:46,257][626795] Updated weights for policy 0, policy_version 198332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:48,075][626795] Updated weights for policy 0, policy_version 198342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:48,975][24592] Fps is (10 sec: 46694.2, 60 sec: 45056.0, 300 sec: 43237.1). Total num frames: 1624850432. Throughput: 0: 11033.7. Samples: 156199734. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:48,977][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:53,203][626795] Updated weights for policy 0, policy_version 198352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:53,975][24592] Fps is (10 sec: 30310.1, 60 sec: 42325.2, 300 sec: 42709.5). Total num frames: 1624932352. Throughput: 0: 10117.5. Samples: 156230268. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:53,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:54,971][626795] Updated weights for policy 0, policy_version 198362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:56,770][626795] Updated weights for policy 0, policy_version 198372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:23:58,426][626795] Updated weights for policy 0, policy_version 198382 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:58,975][24592] Fps is (10 sec: 31948.9, 60 sec: 42325.6, 300 sec: 42709.5). Total num frames: 1625169920. Throughput: 0: 10850.0. Samples: 156265446. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:23:58,976][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:00,127][626795] Updated weights for policy 0, policy_version 198392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:01,845][626795] Updated weights for policy 0, policy_version 198402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:03,473][626795] Updated weights for policy 0, policy_version 198412 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:03,975][24592] Fps is (10 sec: 47514.0, 60 sec: 42326.2, 300 sec: 43237.1). Total num frames: 1625407488. Throughput: 0: 11033.9. Samples: 156338016. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:03,977][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:05,190][626795] Updated weights for policy 0, policy_version 198422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:06,927][626795] Updated weights for policy 0, policy_version 198432 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:08,526][626772] Signal inference workers to stop experience collection... (2150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:08,526][626772] Signal inference workers to resume experience collection... (2150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:08,536][626795] InferenceWorker_p0-w0: stopping experience collection (2150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:08,541][626795] InferenceWorker_p0-w0: resuming experience collection (2150 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:08,563][626795] Updated weights for policy 0, policy_version 198442 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:08,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42325.6, 300 sec: 43292.6). Total num frames: 1625645056. Throughput: 0: 11032.8. Samples: 156410250. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:08,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:10,299][626795] Updated weights for policy 0, policy_version 198452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:11,982][626795] Updated weights for policy 0, policy_version 198462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:13,673][626795] Updated weights for policy 0, policy_version 198472 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:13,975][24592] Fps is (10 sec: 48333.1, 60 sec: 44979.6, 300 sec: 43292.7). Total num frames: 1625890816. Throughput: 0: 11040.1. Samples: 156446328. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:13,976][24592] Avg episode reward: [(0, '4.857')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:15,407][626795] Updated weights for policy 0, policy_version 198482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:17,147][626795] Updated weights for policy 0, policy_version 198492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:18,821][626795] Updated weights for policy 0, policy_version 198502 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:18,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45056.0, 300 sec: 43292.6). Total num frames: 1626128384. Throughput: 0: 11037.6. Samples: 156518280. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:18,976][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:20,497][626795] Updated weights for policy 0, policy_version 198512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:22,408][626795] Updated weights for policy 0, policy_version 198522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:23,975][24592] Fps is (10 sec: 46694.6, 60 sec: 44919.5, 300 sec: 43264.9). Total num frames: 1626357760. Throughput: 0: 10998.7. Samples: 156588228. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:23,977][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:24,205][626795] Updated weights for policy 0, policy_version 198532 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:28,975][24592] Fps is (10 sec: 31948.8, 60 sec: 42461.9, 300 sec: 42765.1). Total num frames: 1626447872. Throughput: 0: 10466.1. Samples: 156600522. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:28,976][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:29,104][626795] Updated weights for policy 0, policy_version 198542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:30,902][626795] Updated weights for policy 0, policy_version 198552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:32,670][626795] Updated weights for policy 0, policy_version 198562 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:33,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42188.8, 300 sec: 42709.5). Total num frames: 1626677248. Throughput: 0: 10147.5. Samples: 156656370. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:33,976][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:34,320][626795] Updated weights for policy 0, policy_version 198572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:36,048][626795] Updated weights for policy 0, policy_version 198582 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:37,680][626795] Updated weights for policy 0, policy_version 198592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:38,976][24592] Fps is (10 sec: 47509.1, 60 sec: 42324.6, 300 sec: 43264.8). Total num frames: 1626923008. Throughput: 0: 11070.2. Samples: 156728436. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:38,978][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:39,384][626795] Updated weights for policy 0, policy_version 198602 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:41,160][626795] Updated weights for policy 0, policy_version 198612 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:42,774][626795] Updated weights for policy 0, policy_version 198622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:43,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42188.8, 300 sec: 43292.6). Total num frames: 1627160576. Throughput: 0: 11084.0. Samples: 156764226. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:43,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:44,544][626795] Updated weights for policy 0, policy_version 198632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:46,227][626795] Updated weights for policy 0, policy_version 198642 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:48,005][626795] Updated weights for policy 0, policy_version 198652 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:48,975][24592] Fps is (10 sec: 47518.2, 60 sec: 42461.9, 300 sec: 43264.9). Total num frames: 1627398144. Throughput: 0: 11069.2. Samples: 156836130. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:48,976][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:49,651][626795] Updated weights for policy 0, policy_version 198662 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:51,290][626795] Updated weights for policy 0, policy_version 198672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:53,086][626795] Updated weights for policy 0, policy_version 198682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:53,975][24592] Fps is (10 sec: 47513.3, 60 sec: 45056.1, 300 sec: 43264.9). Total num frames: 1627635712. Throughput: 0: 11073.5. Samples: 156908556. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:53,977][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:54,691][626795] Updated weights for policy 0, policy_version 198692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:56,560][626795] Updated weights for policy 0, policy_version 198702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:24:58,296][626795] Updated weights for policy 0, policy_version 198712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:58,976][24592] Fps is (10 sec: 47512.1, 60 sec: 45055.7, 300 sec: 43264.8). Total num frames: 1627873280. Throughput: 0: 11059.0. Samples: 156943986. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:24:58,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:03,308][626795] Updated weights for policy 0, policy_version 198722 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:03,976][24592] Fps is (10 sec: 31948.4, 60 sec: 42461.8, 300 sec: 42709.4). Total num frames: 1627955200. Throughput: 0: 10324.2. Samples: 156982872. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:03,977][24592] Avg episode reward: [(0, '4.840')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000198725_1627955200.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000197466_1617641472.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:05,160][626795] Updated weights for policy 0, policy_version 198732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:06,868][626795] Updated weights for policy 0, policy_version 198742 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:08,581][626795] Updated weights for policy 0, policy_version 198752 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:08,975][24592] Fps is (10 sec: 31949.6, 60 sec: 42461.8, 300 sec: 42709.5). Total num frames: 1628192768. Throughput: 0: 10165.8. Samples: 157045692. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:08,977][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:10,193][626795] Updated weights for policy 0, policy_version 198762 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:11,967][626795] Updated weights for policy 0, policy_version 198772 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:13,640][626795] Updated weights for policy 0, policy_version 198782 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:13,975][24592] Fps is (10 sec: 47514.7, 60 sec: 42325.3, 300 sec: 43248.2). Total num frames: 1628430336. Throughput: 0: 10698.0. Samples: 157081932. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:13,978][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:15,359][626795] Updated weights for policy 0, policy_version 198792 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:16,959][626795] Updated weights for policy 0, policy_version 198802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:18,730][626795] Updated weights for policy 0, policy_version 198812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:18,976][24592] Fps is (10 sec: 48330.6, 60 sec: 42461.5, 300 sec: 43320.3). Total num frames: 1628676096. Throughput: 0: 11065.7. Samples: 157154334. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:18,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:20,397][626795] Updated weights for policy 0, policy_version 198822 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:22,112][626795] Updated weights for policy 0, policy_version 198832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:23,745][626795] Updated weights for policy 0, policy_version 198842 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:23,976][24592] Fps is (10 sec: 49149.5, 60 sec: 42734.6, 300 sec: 43348.1). Total num frames: 1628921856. Throughput: 0: 11095.9. Samples: 157227744. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:23,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:25,460][626795] Updated weights for policy 0, policy_version 198852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:27,178][626795] Updated weights for policy 0, policy_version 198862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:28,831][626795] Updated weights for policy 0, policy_version 198872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:28,976][24592] Fps is (10 sec: 48333.6, 60 sec: 45192.3, 300 sec: 43320.3). Total num frames: 1629159424. Throughput: 0: 11102.0. Samples: 157263822. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:28,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:30,455][626795] Updated weights for policy 0, policy_version 198882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:32,318][626795] Updated weights for policy 0, policy_version 198892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:33,975][24592] Fps is (10 sec: 47515.6, 60 sec: 45329.0, 300 sec: 43292.6). Total num frames: 1629396992. Throughput: 0: 11102.7. Samples: 157335750. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:33,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:34,092][626795] Updated weights for policy 0, policy_version 198902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:38,975][24592] Fps is (10 sec: 31950.0, 60 sec: 42599.1, 300 sec: 42765.0). Total num frames: 1629478912. Throughput: 0: 10210.1. Samples: 157368012. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:38,979][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:39,047][626795] Updated weights for policy 0, policy_version 198912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:40,933][626795] Updated weights for policy 0, policy_version 198922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:42,658][626795] Updated weights for policy 0, policy_version 198932 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:43,975][24592] Fps is (10 sec: 31949.0, 60 sec: 42598.4, 300 sec: 42792.8). Total num frames: 1629716480. Throughput: 0: 10163.9. Samples: 157401360. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:43,976][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:44,379][626795] Updated weights for policy 0, policy_version 198942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:46,036][626795] Updated weights for policy 0, policy_version 198952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:47,768][626795] Updated weights for policy 0, policy_version 198962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:48,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42598.4, 300 sec: 43350.8). Total num frames: 1629954048. Throughput: 0: 10906.3. Samples: 157473654. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:48,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:49,484][626795] Updated weights for policy 0, policy_version 198972 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:51,164][626795] Updated weights for policy 0, policy_version 198982 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:52,872][626795] Updated weights for policy 0, policy_version 198992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:53,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42598.4, 300 sec: 43348.3). Total num frames: 1630191616. Throughput: 0: 11103.0. Samples: 157545324. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:53,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:54,643][626795] Updated weights for policy 0, policy_version 199002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:56,337][626795] Updated weights for policy 0, policy_version 199012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:57,959][626795] Updated weights for policy 0, policy_version 199022 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:58,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42598.6, 300 sec: 43348.3). Total num frames: 1630429184. Throughput: 0: 11097.1. Samples: 157581300. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:25:58,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:25:59,769][626795] Updated weights for policy 0, policy_version 199032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:01,487][626795] Updated weights for policy 0, policy_version 199042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:03,169][626795] Updated weights for policy 0, policy_version 199052 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:03,975][24592] Fps is (10 sec: 47513.7, 60 sec: 45192.7, 300 sec: 43348.2). Total num frames: 1630666752. Throughput: 0: 11079.6. Samples: 157652910. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:03,977][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:04,823][626795] Updated weights for policy 0, policy_version 199062 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:06,598][626795] Updated weights for policy 0, policy_version 199072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:08,286][626795] Updated weights for policy 0, policy_version 199082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:08,975][24592] Fps is (10 sec: 47513.9, 60 sec: 45192.6, 300 sec: 43348.3). Total num frames: 1630904320. Throughput: 0: 11023.1. Samples: 157723776. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:08,977][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:10,183][626795] Updated weights for policy 0, policy_version 199092 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:13,975][24592] Fps is (10 sec: 32767.8, 60 sec: 42734.9, 300 sec: 42848.3). Total num frames: 1630994432. Throughput: 0: 10760.4. Samples: 157748034. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:13,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:15,046][626795] Updated weights for policy 0, policy_version 199102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:16,888][626795] Updated weights for policy 0, policy_version 199112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:18,502][626795] Updated weights for policy 0, policy_version 199122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:18,975][24592] Fps is (10 sec: 31948.6, 60 sec: 42462.2, 300 sec: 42820.6). Total num frames: 1631223808. Throughput: 0: 10139.2. Samples: 157792014. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:18,980][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:20,206][626795] Updated weights for policy 0, policy_version 199132 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:21,926][626795] Updated weights for policy 0, policy_version 199142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:23,615][626795] Updated weights for policy 0, policy_version 199152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:23,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42462.2, 300 sec: 43399.8). Total num frames: 1631469568. Throughput: 0: 11033.2. Samples: 157864506. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:23,976][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:25,370][626795] Updated weights for policy 0, policy_version 199162 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:26,995][626795] Updated weights for policy 0, policy_version 199172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:28,816][626795] Updated weights for policy 0, policy_version 199182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:28,976][24592] Fps is (10 sec: 48331.0, 60 sec: 42461.9, 300 sec: 43403.7). Total num frames: 1631707136. Throughput: 0: 11091.2. Samples: 157900470. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:28,976][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:30,431][626795] Updated weights for policy 0, policy_version 199192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:32,116][626795] Updated weights for policy 0, policy_version 199202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:33,772][626795] Updated weights for policy 0, policy_version 199212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:33,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42461.9, 300 sec: 43403.7). Total num frames: 1631944704. Throughput: 0: 11082.0. Samples: 157972344. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:33,976][24592] Avg episode reward: [(0, '4.933')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:35,593][626795] Updated weights for policy 0, policy_version 199222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:37,309][626795] Updated weights for policy 0, policy_version 199232 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:38,976][24592] Fps is (10 sec: 47514.3, 60 sec: 45055.8, 300 sec: 43375.9). Total num frames: 1632182272. Throughput: 0: 11087.9. Samples: 158044284. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:38,980][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:39,036][626795] Updated weights for policy 0, policy_version 199242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:40,695][626795] Updated weights for policy 0, policy_version 199252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:42,442][626795] Updated weights for policy 0, policy_version 199262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:43,975][24592] Fps is (10 sec: 48332.5, 60 sec: 45192.5, 300 sec: 43403.7). Total num frames: 1632428032. Throughput: 0: 11064.0. Samples: 158079180. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:43,976][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:44,202][626795] Updated weights for policy 0, policy_version 199272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:45,886][626795] Updated weights for policy 0, policy_version 199282 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:48,975][24592] Fps is (10 sec: 33588.1, 60 sec: 42734.9, 300 sec: 42903.9). Total num frames: 1632518144. Throughput: 0: 10634.7. Samples: 158131470. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:48,977][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:50,599][626795] Updated weights for policy 0, policy_version 199292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:52,325][626795] Updated weights for policy 0, policy_version 199302 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:53,975][24592] Fps is (10 sec: 32768.1, 60 sec: 42734.9, 300 sec: 42903.9). Total num frames: 1632755712. Throughput: 0: 10269.7. Samples: 158185914. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:53,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:54,135][626795] Updated weights for policy 0, policy_version 199312 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:55,849][626795] Updated weights for policy 0, policy_version 199322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:57,584][626795] Updated weights for policy 0, policy_version 199332 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:58,975][24592] Fps is (10 sec: 47513.2, 60 sec: 42734.9, 300 sec: 43422.4). Total num frames: 1632993280. Throughput: 0: 10515.3. Samples: 158221224. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:26:58,978][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:26:59,340][626795] Updated weights for policy 0, policy_version 199342 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:01,111][626795] Updated weights for policy 0, policy_version 199352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:02,894][626795] Updated weights for policy 0, policy_version 199362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:03,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42734.9, 300 sec: 43459.2). Total num frames: 1633230848. Throughput: 0: 11112.9. Samples: 158292096. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:03,976][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000199369_1633230848.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:04,045][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000198104_1622867968.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:04,524][626795] Updated weights for policy 0, policy_version 199372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:06,304][626795] Updated weights for policy 0, policy_version 199382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:08,030][626795] Updated weights for policy 0, policy_version 199392 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:08,976][24592] Fps is (10 sec: 46692.8, 60 sec: 42598.1, 300 sec: 43431.5). Total num frames: 1633460224. Throughput: 0: 11072.8. Samples: 158362788. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:08,977][24592] Avg episode reward: [(0, '4.861')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:09,758][626795] Updated weights for policy 0, policy_version 199402 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:11,527][626795] Updated weights for policy 0, policy_version 199412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:13,256][626795] Updated weights for policy 0, policy_version 199422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:13,976][24592] Fps is (10 sec: 46692.2, 60 sec: 45055.6, 300 sec: 43403.7). Total num frames: 1633697792. Throughput: 0: 11050.1. Samples: 158397726. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:13,978][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:14,931][626795] Updated weights for policy 0, policy_version 199432 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:16,789][626795] Updated weights for policy 0, policy_version 199442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:18,434][626795] Updated weights for policy 0, policy_version 199452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:18,975][24592] Fps is (10 sec: 47516.1, 60 sec: 45192.6, 300 sec: 43403.7). Total num frames: 1633935360. Throughput: 0: 11031.8. Samples: 158468772. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:18,978][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:20,137][626795] Updated weights for policy 0, policy_version 199462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:23,975][24592] Fps is (10 sec: 33588.8, 60 sec: 42734.9, 300 sec: 42931.6). Total num frames: 1634033664. Throughput: 0: 10374.5. Samples: 158511132. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:23,978][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:24,687][626795] Updated weights for policy 0, policy_version 199472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:26,426][626795] Updated weights for policy 0, policy_version 199482 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:28,175][626795] Updated weights for policy 0, policy_version 199492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:28,975][24592] Fps is (10 sec: 33586.8, 60 sec: 42735.2, 300 sec: 42903.9). Total num frames: 1634271232. Throughput: 0: 10293.9. Samples: 158542404. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:28,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:29,856][626795] Updated weights for policy 0, policy_version 199502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:31,635][626795] Updated weights for policy 0, policy_version 199512 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:33,339][626795] Updated weights for policy 0, policy_version 199522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:33,976][24592] Fps is (10 sec: 47512.3, 60 sec: 42734.7, 300 sec: 43475.0). Total num frames: 1634508800. Throughput: 0: 10718.2. Samples: 158613792. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:33,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:35,046][626795] Updated weights for policy 0, policy_version 199532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:36,875][626795] Updated weights for policy 0, policy_version 199542 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:38,487][626795] Updated weights for policy 0, policy_version 199552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:38,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42735.1, 300 sec: 43514.9). Total num frames: 1634746368. Throughput: 0: 11074.8. Samples: 158684280. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:38,976][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:40,351][626795] Updated weights for policy 0, policy_version 199562 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:42,109][626795] Updated weights for policy 0, policy_version 199572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:43,811][626795] Updated weights for policy 0, policy_version 199582 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:43,975][24592] Fps is (10 sec: 47515.2, 60 sec: 42598.4, 300 sec: 43514.8). Total num frames: 1634983936. Throughput: 0: 11062.3. Samples: 158719026. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:43,976][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:45,588][626795] Updated weights for policy 0, policy_version 199592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:47,312][626795] Updated weights for policy 0, policy_version 199602 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:48,940][626795] Updated weights for policy 0, policy_version 199612 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:48,975][24592] Fps is (10 sec: 47512.9, 60 sec: 45055.9, 300 sec: 43487.0). Total num frames: 1635221504. Throughput: 0: 11064.0. Samples: 158789976. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:48,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:50,770][626795] Updated weights for policy 0, policy_version 199622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:52,393][626795] Updated weights for policy 0, policy_version 199632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:53,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45056.0, 300 sec: 43487.1). Total num frames: 1635459072. Throughput: 0: 11086.9. Samples: 158861694. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:53,976][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:54,118][626795] Updated weights for policy 0, policy_version 199642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:27:55,837][626795] Updated weights for policy 0, policy_version 199652 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:59,096][24592] Fps is (10 sec: 33995.5, 60 sec: 42785.2, 300 sec: 43025.3). Total num frames: 1635565568. Throughput: 0: 10282.9. Samples: 158861694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:27:59,097][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:00,475][626795] Updated weights for policy 0, policy_version 199662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:02,247][626795] Updated weights for policy 0, policy_version 199672 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:03,921][626795] Updated weights for policy 0, policy_version 199682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:03,975][24592] Fps is (10 sec: 33586.7, 60 sec: 42734.9, 300 sec: 43015.0). Total num frames: 1635794944. Throughput: 0: 10340.1. Samples: 158934078. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:03,976][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:05,703][626795] Updated weights for policy 0, policy_version 199692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:07,430][626795] Updated weights for policy 0, policy_version 199702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:08,975][24592] Fps is (10 sec: 46436.7, 60 sec: 42735.2, 300 sec: 43484.1). Total num frames: 1636024320. Throughput: 0: 10973.5. Samples: 159004938. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:08,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:09,098][626795] Updated weights for policy 0, policy_version 199712 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:10,861][626795] Updated weights for policy 0, policy_version 199722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:12,584][626795] Updated weights for policy 0, policy_version 199732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:13,975][24592] Fps is (10 sec: 47514.4, 60 sec: 42871.9, 300 sec: 43542.6). Total num frames: 1636270080. Throughput: 0: 11076.0. Samples: 159040824. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:13,976][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:14,151][626795] Updated weights for policy 0, policy_version 199742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:15,935][626795] Updated weights for policy 0, policy_version 199752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:17,623][626795] Updated weights for policy 0, policy_version 199762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:18,975][24592] Fps is (10 sec: 48333.4, 60 sec: 42871.4, 300 sec: 43542.6). Total num frames: 1636507648. Throughput: 0: 11097.3. Samples: 159113166. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:18,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:19,272][626795] Updated weights for policy 0, policy_version 199772 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:21,046][626795] Updated weights for policy 0, policy_version 199782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:22,763][626795] Updated weights for policy 0, policy_version 199792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:23,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45329.1, 300 sec: 43570.3). Total num frames: 1636753408. Throughput: 0: 11143.3. Samples: 159185730. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:23,976][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:24,424][626795] Updated weights for policy 0, policy_version 199802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:26,162][626795] Updated weights for policy 0, policy_version 199812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:27,867][626795] Updated weights for policy 0, policy_version 199822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:28,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45329.1, 300 sec: 43542.6). Total num frames: 1636990976. Throughput: 0: 11167.5. Samples: 159221562. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:28,978][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:29,629][626795] Updated weights for policy 0, policy_version 199832 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:34,457][24592] Fps is (10 sec: 32825.5, 60 sec: 42801.2, 300 sec: 43028.0). Total num frames: 1637097472. Throughput: 0: 10263.7. Samples: 159256782. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:34,458][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:34,577][626795] Updated weights for policy 0, policy_version 199842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:36,362][626795] Updated weights for policy 0, policy_version 199852 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:38,048][626795] Updated weights for policy 0, policy_version 199862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:38,976][24592] Fps is (10 sec: 31946.6, 60 sec: 42734.5, 300 sec: 42987.1). Total num frames: 1637310464. Throughput: 0: 10298.4. Samples: 159325128. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:38,977][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:39,767][626795] Updated weights for policy 0, policy_version 199872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:41,507][626795] Updated weights for policy 0, policy_version 199882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:43,232][626795] Updated weights for policy 0, policy_version 199892 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:43,975][24592] Fps is (10 sec: 47335.0, 60 sec: 42734.8, 300 sec: 43042.7). Total num frames: 1637548032. Throughput: 0: 11121.5. Samples: 159360816. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:43,976][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:44,953][626795] Updated weights for policy 0, policy_version 199902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:46,663][626795] Updated weights for policy 0, policy_version 199912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:48,266][626795] Updated weights for policy 0, policy_version 199922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:48,975][24592] Fps is (10 sec: 48335.5, 60 sec: 42871.5, 300 sec: 43598.1). Total num frames: 1637793792. Throughput: 0: 11077.4. Samples: 159432558. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:48,977][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:50,097][626795] Updated weights for policy 0, policy_version 199932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:51,777][626795] Updated weights for policy 0, policy_version 199942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:53,394][626795] Updated weights for policy 0, policy_version 199952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:53,975][24592] Fps is (10 sec: 48333.5, 60 sec: 42871.5, 300 sec: 43598.1). Total num frames: 1638031360. Throughput: 0: 11113.5. Samples: 159505044. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:53,977][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:55,160][626795] Updated weights for policy 0, policy_version 199962 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:56,871][626795] Updated weights for policy 0, policy_version 199972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:28:58,415][626795] Updated weights for policy 0, policy_version 199982 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:58,975][24592] Fps is (10 sec: 48333.3, 60 sec: 45283.9, 300 sec: 43625.9). Total num frames: 1638277120. Throughput: 0: 11114.0. Samples: 159540954. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:28:58,976][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:00,174][626795] Updated weights for policy 0, policy_version 199992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:01,967][626795] Updated weights for policy 0, policy_version 200002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:03,686][626795] Updated weights for policy 0, policy_version 200012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:03,976][24592] Fps is (10 sec: 47512.9, 60 sec: 45192.5, 300 sec: 43598.1). Total num frames: 1638506496. Throughput: 0: 11104.4. Samples: 159612864. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:03,978][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000200013_1638506496.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:04,053][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000198725_1627955200.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:05,493][626795] Updated weights for policy 0, policy_version 200022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:09,832][24592] Fps is (10 sec: 32445.5, 60 sec: 42806.4, 300 sec: 43056.5). Total num frames: 1638629376. Throughput: 0: 10067.4. Samples: 159647388. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:09,833][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:10,404][626795] Updated weights for policy 0, policy_version 200032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:12,088][626795] Updated weights for policy 0, policy_version 200042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:13,835][626795] Updated weights for policy 0, policy_version 200052 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:13,975][24592] Fps is (10 sec: 31948.8, 60 sec: 42598.3, 300 sec: 43042.7). Total num frames: 1638825984. Throughput: 0: 10210.9. Samples: 159681054. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:13,976][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:15,609][626795] Updated weights for policy 0, policy_version 200062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:17,270][626795] Updated weights for policy 0, policy_version 200072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:18,975][24592] Fps is (10 sec: 47485.8, 60 sec: 42598.3, 300 sec: 43070.5). Total num frames: 1639063552. Throughput: 0: 11129.8. Samples: 159752262. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:18,978][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:18,985][626795] Updated weights for policy 0, policy_version 200082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:20,668][626795] Updated weights for policy 0, policy_version 200092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:22,353][626795] Updated weights for policy 0, policy_version 200102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:23,975][24592] Fps is (10 sec: 48333.5, 60 sec: 42598.4, 300 sec: 43598.1). Total num frames: 1639309312. Throughput: 0: 11107.4. Samples: 159824952. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:23,976][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:24,123][626795] Updated weights for policy 0, policy_version 200112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:25,790][626795] Updated weights for policy 0, policy_version 200122 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:27,505][626795] Updated weights for policy 0, policy_version 200132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:28,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42598.3, 300 sec: 43625.9). Total num frames: 1639546880. Throughput: 0: 11104.7. Samples: 159860526. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:28,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:29,217][626795] Updated weights for policy 0, policy_version 200142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:30,905][626795] Updated weights for policy 0, policy_version 200152 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:32,689][626795] Updated weights for policy 0, policy_version 200162 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:33,975][24592] Fps is (10 sec: 47513.5, 60 sec: 45145.3, 300 sec: 43598.2). Total num frames: 1639784448. Throughput: 0: 11116.0. Samples: 159932778. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:33,976][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:34,329][626795] Updated weights for policy 0, policy_version 200172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:36,024][626795] Updated weights for policy 0, policy_version 200182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:37,762][626795] Updated weights for policy 0, policy_version 200192 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:38,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45193.0, 300 sec: 43598.1). Total num frames: 1640022016. Throughput: 0: 11083.3. Samples: 160003794. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:38,977][24592] Avg episode reward: [(0, '5.020')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:39,551][626795] Updated weights for policy 0, policy_version 200202 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:41,222][626795] Updated weights for policy 0, policy_version 200212 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:45,220][24592] Fps is (10 sec: 34970.0, 60 sec: 42936.8, 300 sec: 43138.4). Total num frames: 1640177664. Throughput: 0: 10778.7. Samples: 160039410. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:45,220][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:46,168][626795] Updated weights for policy 0, policy_version 200222 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:47,940][626795] Updated weights for policy 0, policy_version 200232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:48,975][24592] Fps is (10 sec: 31949.0, 60 sec: 42461.9, 300 sec: 43070.5). Total num frames: 1640341504. Throughput: 0: 10222.4. Samples: 160072872. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:48,976][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:49,672][626795] Updated weights for policy 0, policy_version 200242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:51,367][626795] Updated weights for policy 0, policy_version 200252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:52,938][626795] Updated weights for policy 0, policy_version 200262 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:53,975][24592] Fps is (10 sec: 45845.8, 60 sec: 42461.9, 300 sec: 43070.5). Total num frames: 1640579072. Throughput: 0: 11264.2. Samples: 160144626. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:53,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:54,797][626795] Updated weights for policy 0, policy_version 200272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:56,571][626795] Updated weights for policy 0, policy_version 200282 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:58,230][626795] Updated weights for policy 0, policy_version 200292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:58,975][24592] Fps is (10 sec: 48332.3, 60 sec: 42461.8, 300 sec: 43625.9). Total num frames: 1640824832. Throughput: 0: 11088.1. Samples: 160180020. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:29:58,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:29:59,907][626795] Updated weights for policy 0, policy_version 200302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:01,644][626795] Updated weights for policy 0, policy_version 200312 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:03,325][626795] Updated weights for policy 0, policy_version 200322 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:03,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42598.5, 300 sec: 43625.9). Total num frames: 1641062400. Throughput: 0: 11115.9. Samples: 160252476. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:03,976][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:04,996][626795] Updated weights for policy 0, policy_version 200332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:06,681][626795] Updated weights for policy 0, policy_version 200342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:08,393][626795] Updated weights for policy 0, policy_version 200352 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:08,975][24592] Fps is (10 sec: 48333.4, 60 sec: 45293.2, 300 sec: 43653.6). Total num frames: 1641308160. Throughput: 0: 11109.5. Samples: 160324878. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:08,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:10,082][626795] Updated weights for policy 0, policy_version 200362 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:11,893][626795] Updated weights for policy 0, policy_version 200372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:13,740][626795] Updated weights for policy 0, policy_version 200382 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:13,975][24592] Fps is (10 sec: 48332.2, 60 sec: 45329.1, 300 sec: 43625.9). Total num frames: 1641545728. Throughput: 0: 11114.5. Samples: 160360680. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:13,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:15,427][626795] Updated weights for policy 0, policy_version 200392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:17,131][626795] Updated weights for policy 0, policy_version 200402 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:20,621][24592] Fps is (10 sec: 33763.6, 60 sec: 42789.9, 300 sec: 43080.1). Total num frames: 1641701376. Throughput: 0: 9899.7. Samples: 160394562. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:20,623][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:22,065][626795] Updated weights for policy 0, policy_version 200412 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:23,752][626795] Updated weights for policy 0, policy_version 200422 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:23,990][24592] Fps is (10 sec: 31085.7, 60 sec: 42451.8, 300 sec: 43040.7). Total num frames: 1641857024. Throughput: 0: 10203.1. Samples: 160463076. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:23,990][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:25,447][626795] Updated weights for policy 0, policy_version 200432 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:27,195][626795] Updated weights for policy 0, policy_version 200442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:28,880][626795] Updated weights for policy 0, policy_version 200452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:28,976][24592] Fps is (10 sec: 48048.8, 60 sec: 42598.3, 300 sec: 43070.4). Total num frames: 1642102784. Throughput: 0: 10511.7. Samples: 160499358. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:28,979][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:30,555][626795] Updated weights for policy 0, policy_version 200462 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:32,248][626795] Updated weights for policy 0, policy_version 200472 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:33,974][626795] Updated weights for policy 0, policy_version 200482 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:33,975][24592] Fps is (10 sec: 49222.3, 60 sec: 42735.0, 300 sec: 43625.9). Total num frames: 1642348544. Throughput: 0: 11093.3. Samples: 160572072. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:33,978][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:35,668][626795] Updated weights for policy 0, policy_version 200492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:37,325][626795] Updated weights for policy 0, policy_version 200502 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:38,975][24592] Fps is (10 sec: 48334.4, 60 sec: 42735.0, 300 sec: 43625.9). Total num frames: 1642586112. Throughput: 0: 11104.5. Samples: 160644330. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:38,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:39,028][626795] Updated weights for policy 0, policy_version 200512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:40,721][626795] Updated weights for policy 0, policy_version 200522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:41,365][626772] Signal inference workers to stop experience collection... (2200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:41,371][626772] Signal inference workers to resume experience collection... (2200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:41,382][626795] InferenceWorker_p0-w0: stopping experience collection (2200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:41,385][626795] InferenceWorker_p0-w0: resuming experience collection (2200 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:42,649][626795] Updated weights for policy 0, policy_version 200532 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:43,975][24592] Fps is (10 sec: 47513.5, 60 sec: 45034.3, 300 sec: 43625.9). Total num frames: 1642823680. Throughput: 0: 11112.8. Samples: 160680096. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:43,977][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:44,188][626795] Updated weights for policy 0, policy_version 200542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:45,892][626795] Updated weights for policy 0, policy_version 200552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:47,727][626795] Updated weights for policy 0, policy_version 200562 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:48,975][24592] Fps is (10 sec: 47512.7, 60 sec: 45329.0, 300 sec: 43625.9). Total num frames: 1643061248. Throughput: 0: 11081.3. Samples: 160751136. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:48,976][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:49,505][626795] Updated weights for policy 0, policy_version 200572 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:51,115][626795] Updated weights for policy 0, policy_version 200582 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:56,031][24592] Fps is (10 sec: 34654.5, 60 sec: 42903.2, 300 sec: 43130.9). Total num frames: 1643241472. Throughput: 0: 9802.8. Samples: 160786158. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:56,032][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:56,141][626795] Updated weights for policy 0, policy_version 200592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:57,914][626795] Updated weights for policy 0, policy_version 200602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:58,975][24592] Fps is (10 sec: 31949.2, 60 sec: 42598.5, 300 sec: 43098.2). Total num frames: 1643380736. Throughput: 0: 10178.2. Samples: 160818696. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:30:58,978][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:30:59,622][626795] Updated weights for policy 0, policy_version 200612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:01,327][626795] Updated weights for policy 0, policy_version 200622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:03,051][626795] Updated weights for policy 0, policy_version 200632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:03,975][24592] Fps is (10 sec: 47434.5, 60 sec: 42598.3, 300 sec: 43098.2). Total num frames: 1643618304. Throughput: 0: 11430.6. Samples: 160890126. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:03,977][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:03,987][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000200638_1643626496.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:04,069][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000199369_1633230848.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:04,686][626795] Updated weights for policy 0, policy_version 200642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:06,515][626795] Updated weights for policy 0, policy_version 200652 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:08,262][626795] Updated weights for policy 0, policy_version 200662 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:08,975][24592] Fps is (10 sec: 47513.9, 60 sec: 42461.9, 300 sec: 43598.1). Total num frames: 1643855872. Throughput: 0: 11064.0. Samples: 160960800. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:08,977][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:09,974][626795] Updated weights for policy 0, policy_version 200672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:11,723][626795] Updated weights for policy 0, policy_version 200682 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:13,408][626795] Updated weights for policy 0, policy_version 200692 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:13,975][24592] Fps is (10 sec: 47513.9, 60 sec: 42461.9, 300 sec: 43625.9). Total num frames: 1644093440. Throughput: 0: 11061.2. Samples: 160997112. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:13,977][24592] Avg episode reward: [(0, '5.014')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:15,074][626795] Updated weights for policy 0, policy_version 200702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:16,869][626795] Updated weights for policy 0, policy_version 200712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:18,475][626795] Updated weights for policy 0, policy_version 200722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:18,975][24592] Fps is (10 sec: 47513.2, 60 sec: 45063.5, 300 sec: 43598.1). Total num frames: 1644331008. Throughput: 0: 11036.2. Samples: 161068704. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:18,976][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:20,235][626795] Updated weights for policy 0, policy_version 200732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:21,998][626795] Updated weights for policy 0, policy_version 200742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:23,852][626795] Updated weights for policy 0, policy_version 200752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:23,976][24592] Fps is (10 sec: 47512.3, 60 sec: 45203.0, 300 sec: 43598.1). Total num frames: 1644568576. Throughput: 0: 11001.1. Samples: 161139384. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:23,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:25,415][626795] Updated weights for policy 0, policy_version 200762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:27,185][626795] Updated weights for policy 0, policy_version 200772 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:31,429][24592] Fps is (10 sec: 34863.4, 60 sec: 42630.2, 300 sec: 43100.8). Total num frames: 1644765184. Throughput: 0: 10426.3. Samples: 161174862. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:31,430][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:32,153][626795] Updated weights for policy 0, policy_version 200782 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:33,862][626795] Updated weights for policy 0, policy_version 200792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:33,976][24592] Fps is (10 sec: 31946.5, 60 sec: 42324.5, 300 sec: 43070.4). Total num frames: 1644888064. Throughput: 0: 10166.7. Samples: 161208648. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:33,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:35,529][626795] Updated weights for policy 0, policy_version 200802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:37,306][626795] Updated weights for policy 0, policy_version 200812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:38,970][626795] Updated weights for policy 0, policy_version 200822 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:38,975][24592] Fps is (10 sec: 48850.3, 60 sec: 42461.8, 300 sec: 43070.5). Total num frames: 1645133824. Throughput: 0: 11500.5. Samples: 161280036. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:38,977][24592] Avg episode reward: [(0, '4.917')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:40,783][626795] Updated weights for policy 0, policy_version 200832 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:42,415][626795] Updated weights for policy 0, policy_version 200842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:43,978][24592] Fps is (10 sec: 47506.9, 60 sec: 42323.5, 300 sec: 43542.2). Total num frames: 1645363200. Throughput: 0: 11039.4. Samples: 161315496. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:43,980][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:44,175][626795] Updated weights for policy 0, policy_version 200852 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:45,916][626795] Updated weights for policy 0, policy_version 200862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:47,681][626795] Updated weights for policy 0, policy_version 200872 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:48,975][24592] Fps is (10 sec: 46694.6, 60 sec: 42325.5, 300 sec: 43542.6). Total num frames: 1645600768. Throughput: 0: 11031.3. Samples: 161386530. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:48,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:49,359][626795] Updated weights for policy 0, policy_version 200882 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:51,090][626795] Updated weights for policy 0, policy_version 200892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:52,829][626795] Updated weights for policy 0, policy_version 200902 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:53,975][24592] Fps is (10 sec: 47525.3, 60 sec: 44816.7, 300 sec: 43542.6). Total num frames: 1645838336. Throughput: 0: 11032.5. Samples: 161457264. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:53,976][24592] Avg episode reward: [(0, '4.779')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:54,593][626795] Updated weights for policy 0, policy_version 200912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:56,414][626795] Updated weights for policy 0, policy_version 200922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:57,998][626795] Updated weights for policy 0, policy_version 200932 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:58,975][24592] Fps is (10 sec: 47512.7, 60 sec: 44919.4, 300 sec: 43542.6). Total num frames: 1646075904. Throughput: 0: 11020.5. Samples: 161493036. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:31:58,977][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:31:59,718][626795] Updated weights for policy 0, policy_version 200942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:01,503][626795] Updated weights for policy 0, policy_version 200952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:03,250][626795] Updated weights for policy 0, policy_version 200962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:06,847][24592] Fps is (10 sec: 35640.4, 60 sec: 42607.2, 300 sec: 43095.3). Total num frames: 1646297088. Throughput: 0: 10344.2. Samples: 161563896. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:06,849][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:08,159][626795] Updated weights for policy 0, policy_version 200972 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:08,975][24592] Fps is (10 sec: 31949.2, 60 sec: 42325.3, 300 sec: 43042.8). Total num frames: 1646395392. Throughput: 0: 10181.0. Samples: 161597526. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:08,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:09,914][626795] Updated weights for policy 0, policy_version 200982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:11,620][626795] Updated weights for policy 0, policy_version 200992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:13,351][626795] Updated weights for policy 0, policy_version 201002 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:13,975][24592] Fps is (10 sec: 47118.4, 60 sec: 42325.4, 300 sec: 43042.7). Total num frames: 1646632960. Throughput: 0: 10754.0. Samples: 161632404. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:13,976][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:15,142][626795] Updated weights for policy 0, policy_version 201012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:16,857][626795] Updated weights for policy 0, policy_version 201022 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:18,457][626795] Updated weights for policy 0, policy_version 201032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:18,975][24592] Fps is (10 sec: 47513.9, 60 sec: 42325.4, 300 sec: 43514.8). Total num frames: 1646870528. Throughput: 0: 10995.3. Samples: 161703426. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:18,976][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:20,326][626795] Updated weights for policy 0, policy_version 201042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:21,987][626795] Updated weights for policy 0, policy_version 201052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:23,710][626795] Updated weights for policy 0, policy_version 201062 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:23,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42325.6, 300 sec: 43514.8). Total num frames: 1647108096. Throughput: 0: 10998.4. Samples: 161774964. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:23,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:25,411][626795] Updated weights for policy 0, policy_version 201072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:27,090][626795] Updated weights for policy 0, policy_version 201082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:28,850][626795] Updated weights for policy 0, policy_version 201092 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:28,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44841.8, 300 sec: 43514.8). Total num frames: 1647345664. Throughput: 0: 11005.7. Samples: 161810724. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:28,976][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:30,596][626795] Updated weights for policy 0, policy_version 201102 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:32,185][626795] Updated weights for policy 0, policy_version 201112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:33,976][24592] Fps is (10 sec: 47512.5, 60 sec: 44920.1, 300 sec: 43514.8). Total num frames: 1647583232. Throughput: 0: 11007.1. Samples: 161881854. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:33,977][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:34,072][626795] Updated weights for policy 0, policy_version 201122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:35,702][626795] Updated weights for policy 0, policy_version 201132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:37,496][626795] Updated weights for policy 0, policy_version 201142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:38,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44782.9, 300 sec: 43514.8). Total num frames: 1647820800. Throughput: 0: 11011.3. Samples: 161952774. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:38,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:42,440][626795] Updated weights for policy 0, policy_version 201152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:43,975][24592] Fps is (10 sec: 31949.4, 60 sec: 42327.1, 300 sec: 42987.2). Total num frames: 1647902720. Throughput: 0: 10346.4. Samples: 161958624. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:43,977][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:44,237][626795] Updated weights for policy 0, policy_version 201162 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:45,998][626795] Updated weights for policy 0, policy_version 201172 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:47,695][626795] Updated weights for policy 0, policy_version 201182 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:48,976][24592] Fps is (10 sec: 31947.3, 60 sec: 42325.0, 300 sec: 42987.1). Total num frames: 1648140288. Throughput: 0: 10844.2. Samples: 162020748. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:48,977][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:49,476][626795] Updated weights for policy 0, policy_version 201192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:51,248][626795] Updated weights for policy 0, policy_version 201202 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:52,939][626795] Updated weights for policy 0, policy_version 201212 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:53,976][24592] Fps is (10 sec: 46692.8, 60 sec: 42188.6, 300 sec: 43421.5). Total num frames: 1648369664. Throughput: 0: 10970.6. Samples: 162091206. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:53,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:54,707][626795] Updated weights for policy 0, policy_version 201222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:56,441][626795] Updated weights for policy 0, policy_version 201232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:58,054][626795] Updated weights for policy 0, policy_version 201242 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:58,975][24592] Fps is (10 sec: 47515.8, 60 sec: 42325.4, 300 sec: 43459.3). Total num frames: 1648615424. Throughput: 0: 10979.5. Samples: 162126480. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:32:58,976][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:32:59,906][626795] Updated weights for policy 0, policy_version 201252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:01,557][626795] Updated weights for policy 0, policy_version 201262 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:03,241][626795] Updated weights for policy 0, policy_version 201272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:03,975][24592] Fps is (10 sec: 48334.8, 60 sec: 44739.8, 300 sec: 43487.0). Total num frames: 1648852992. Throughput: 0: 11003.2. Samples: 162198570. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:03,976][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000201276_1648852992.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:04,083][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000200013_1638506496.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:05,030][626795] Updated weights for policy 0, policy_version 201282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:06,708][626795] Updated weights for policy 0, policy_version 201292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:08,454][626795] Updated weights for policy 0, policy_version 201302 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:08,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44919.5, 300 sec: 43459.3). Total num frames: 1649090560. Throughput: 0: 11001.6. Samples: 162270036. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:08,977][24592] Avg episode reward: [(0, '5.063')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:10,167][626795] Updated weights for policy 0, policy_version 201312 (0.0034)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:11,814][626795] Updated weights for policy 0, policy_version 201322 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:13,560][626795] Updated weights for policy 0, policy_version 201332 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:13,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44919.5, 300 sec: 43459.3). Total num frames: 1649328128. Throughput: 0: 10994.5. Samples: 162305478. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:13,976][24592] Avg episode reward: [(0, '4.950')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:18,585][626795] Updated weights for policy 0, policy_version 201342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:18,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42325.3, 300 sec: 42903.9). Total num frames: 1649410048. Throughput: 0: 10183.6. Samples: 162340116. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:18,976][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:20,384][626795] Updated weights for policy 0, policy_version 201352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:22,175][626795] Updated weights for policy 0, policy_version 201362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:23,975][24592] Fps is (10 sec: 30310.0, 60 sec: 42052.2, 300 sec: 42848.3). Total num frames: 1649631232. Throughput: 0: 10091.4. Samples: 162406890. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:23,977][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:24,020][626795] Updated weights for policy 0, policy_version 201372 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:25,777][626795] Updated weights for policy 0, policy_version 201382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:27,618][626795] Updated weights for policy 0, policy_version 201392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:28,975][24592] Fps is (10 sec: 45056.1, 60 sec: 41915.7, 300 sec: 43335.6). Total num frames: 1649860608. Throughput: 0: 10703.1. Samples: 162440262. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:28,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:29,348][626795] Updated weights for policy 0, policy_version 201402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:31,039][626795] Updated weights for policy 0, policy_version 201412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:32,783][626795] Updated weights for policy 0, policy_version 201422 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:33,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42052.4, 300 sec: 43376.0). Total num frames: 1650106368. Throughput: 0: 10904.6. Samples: 162511452. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:33,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:34,512][626795] Updated weights for policy 0, policy_version 201432 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:36,149][626795] Updated weights for policy 0, policy_version 201442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:37,874][626795] Updated weights for policy 0, policy_version 201452 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:38,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42052.3, 300 sec: 43376.0). Total num frames: 1650343936. Throughput: 0: 10948.5. Samples: 162583884. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:38,976][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:39,554][626795] Updated weights for policy 0, policy_version 201462 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:41,335][626795] Updated weights for policy 0, policy_version 201472 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:42,974][626795] Updated weights for policy 0, policy_version 201482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:43,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44646.4, 300 sec: 43348.2). Total num frames: 1650581504. Throughput: 0: 10952.0. Samples: 162619320. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:43,976][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:44,727][626795] Updated weights for policy 0, policy_version 201492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:46,461][626795] Updated weights for policy 0, policy_version 201502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:48,093][626795] Updated weights for policy 0, policy_version 201512 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:48,975][24592] Fps is (10 sec: 48333.0, 60 sec: 44783.3, 300 sec: 43376.0). Total num frames: 1650827264. Throughput: 0: 10959.7. Samples: 162691758. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:48,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:49,791][626795] Updated weights for policy 0, policy_version 201522 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:53,977][24592] Fps is (10 sec: 33582.5, 60 sec: 42461.1, 300 sec: 42848.1). Total num frames: 1650917376. Throughput: 0: 10139.7. Samples: 162726336. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:53,979][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:54,670][626795] Updated weights for policy 0, policy_version 201532 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:56,431][626795] Updated weights for policy 0, policy_version 201542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:33:58,214][626795] Updated weights for policy 0, policy_version 201552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:58,975][24592] Fps is (10 sec: 31948.6, 60 sec: 42188.8, 300 sec: 42848.3). Total num frames: 1651146752. Throughput: 0: 10125.7. Samples: 162761136. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:33:58,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:00,012][626795] Updated weights for policy 0, policy_version 201562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:01,877][626795] Updated weights for policy 0, policy_version 201572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:03,456][626795] Updated weights for policy 0, policy_version 201582 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:03,976][24592] Fps is (10 sec: 45880.3, 60 sec: 42052.0, 300 sec: 43335.1). Total num frames: 1651376128. Throughput: 0: 10883.7. Samples: 162829884. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:03,977][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:05,273][626795] Updated weights for policy 0, policy_version 201592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:06,974][626795] Updated weights for policy 0, policy_version 201602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:08,613][626795] Updated weights for policy 0, policy_version 201612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:08,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42188.7, 300 sec: 43376.0). Total num frames: 1651621888. Throughput: 0: 11009.6. Samples: 162902322. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:08,977][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:10,366][626795] Updated weights for policy 0, policy_version 201622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:12,011][626795] Updated weights for policy 0, policy_version 201632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:13,701][626795] Updated weights for policy 0, policy_version 201642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:13,975][24592] Fps is (10 sec: 48334.7, 60 sec: 42188.8, 300 sec: 43376.0). Total num frames: 1651859456. Throughput: 0: 11060.3. Samples: 162937974. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:13,976][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:15,457][626795] Updated weights for policy 0, policy_version 201652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:17,226][626795] Updated weights for policy 0, policy_version 201662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:18,920][626795] Updated weights for policy 0, policy_version 201672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:18,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44782.9, 300 sec: 43348.2). Total num frames: 1652097024. Throughput: 0: 11065.8. Samples: 163009410. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:18,976][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:20,633][626795] Updated weights for policy 0, policy_version 201682 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:22,292][626795] Updated weights for policy 0, policy_version 201692 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:23,975][24592] Fps is (10 sec: 46693.7, 60 sec: 44919.5, 300 sec: 43320.4). Total num frames: 1652326400. Throughput: 0: 11047.3. Samples: 163081014. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:23,979][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:24,164][626795] Updated weights for policy 0, policy_version 201702 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:28,976][24592] Fps is (10 sec: 31948.4, 60 sec: 42598.3, 300 sec: 42820.5). Total num frames: 1652416512. Throughput: 0: 10725.1. Samples: 163101948. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:28,978][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:29,089][626795] Updated weights for policy 0, policy_version 201712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:30,836][626795] Updated weights for policy 0, policy_version 201722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:32,611][626795] Updated weights for policy 0, policy_version 201732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:33,975][24592] Fps is (10 sec: 31129.8, 60 sec: 42188.8, 300 sec: 42765.0). Total num frames: 1652637696. Throughput: 0: 10118.1. Samples: 163147074. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:33,977][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:34,579][626795] Updated weights for policy 0, policy_version 201742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:36,336][626795] Updated weights for policy 0, policy_version 201752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:37,951][626795] Updated weights for policy 0, policy_version 201762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:38,989][24592] Fps is (10 sec: 45814.4, 60 sec: 42179.4, 300 sec: 43223.1). Total num frames: 1652875264. Throughput: 0: 10909.6. Samples: 163217400. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:38,990][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:39,676][626795] Updated weights for policy 0, policy_version 201772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:41,318][626795] Updated weights for policy 0, policy_version 201782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:43,199][626795] Updated weights for policy 0, policy_version 201792 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:43,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42325.4, 300 sec: 43320.4). Total num frames: 1653121024. Throughput: 0: 10929.1. Samples: 163252944. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:43,977][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:44,700][626795] Updated weights for policy 0, policy_version 201802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:46,494][626795] Updated weights for policy 0, policy_version 201812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:48,243][626795] Updated weights for policy 0, policy_version 201822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:48,975][24592] Fps is (10 sec: 48397.5, 60 sec: 42188.8, 300 sec: 43320.4). Total num frames: 1653358592. Throughput: 0: 11013.5. Samples: 163325490. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:48,976][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:49,865][626795] Updated weights for policy 0, policy_version 201832 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:51,718][626795] Updated weights for policy 0, policy_version 201842 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:53,502][626795] Updated weights for policy 0, policy_version 201852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:53,975][24592] Fps is (10 sec: 46694.3, 60 sec: 44510.9, 300 sec: 43264.9). Total num frames: 1653587968. Throughput: 0: 10960.0. Samples: 163395522. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:53,976][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:55,201][626795] Updated weights for policy 0, policy_version 201862 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:56,845][626795] Updated weights for policy 0, policy_version 201872 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:34:58,670][626795] Updated weights for policy 0, policy_version 201882 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:58,975][24592] Fps is (10 sec: 47513.4, 60 sec: 44782.9, 300 sec: 43292.6). Total num frames: 1653833728. Throughput: 0: 10970.4. Samples: 163431642. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:34:58,977][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:00,346][626795] Updated weights for policy 0, policy_version 201892 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:03,975][24592] Fps is (10 sec: 33587.2, 60 sec: 42462.1, 300 sec: 42765.0). Total num frames: 1653923840. Throughput: 0: 10501.9. Samples: 163481994. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:03,976][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000201895_1653923840.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:04,063][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000200638_1643626496.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:05,246][626795] Updated weights for policy 0, policy_version 201902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:07,050][626795] Updated weights for policy 0, policy_version 201912 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:08,872][626795] Updated weights for policy 0, policy_version 201922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:08,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42052.3, 300 sec: 42709.5). Total num frames: 1654145024. Throughput: 0: 10075.4. Samples: 163534404. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:08,977][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:10,622][626795] Updated weights for policy 0, policy_version 201932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:12,457][626795] Updated weights for policy 0, policy_version 201942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:13,975][24592] Fps is (10 sec: 45875.1, 60 sec: 42052.2, 300 sec: 43228.4). Total num frames: 1654382592. Throughput: 0: 10401.6. Samples: 163570020. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:13,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:14,003][626795] Updated weights for policy 0, policy_version 201952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:15,772][626795] Updated weights for policy 0, policy_version 201962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:17,483][626795] Updated weights for policy 0, policy_version 201972 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:18,976][24592] Fps is (10 sec: 47512.7, 60 sec: 42052.1, 300 sec: 43266.9). Total num frames: 1654620160. Throughput: 0: 10981.0. Samples: 163641222. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:18,977][24592] Avg episode reward: [(0, '4.849')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:19,268][626795] Updated weights for policy 0, policy_version 201982 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:20,860][626795] Updated weights for policy 0, policy_version 201992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:22,594][626795] Updated weights for policy 0, policy_version 202002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:23,976][24592] Fps is (10 sec: 48330.5, 60 sec: 42325.1, 300 sec: 43264.8). Total num frames: 1654865920. Throughput: 0: 11017.4. Samples: 163713042. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:23,977][24592] Avg episode reward: [(0, '4.894')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:24,347][626795] Updated weights for policy 0, policy_version 202012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:26,080][626795] Updated weights for policy 0, policy_version 202022 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:27,675][626795] Updated weights for policy 0, policy_version 202032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:28,975][24592] Fps is (10 sec: 48333.9, 60 sec: 44783.0, 300 sec: 43237.1). Total num frames: 1655103488. Throughput: 0: 11029.6. Samples: 163749276. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:28,976][24592] Avg episode reward: [(0, '5.018')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:29,430][626795] Updated weights for policy 0, policy_version 202042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:31,112][626795] Updated weights for policy 0, policy_version 202052 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:32,889][626795] Updated weights for policy 0, policy_version 202062 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:33,975][24592] Fps is (10 sec: 48334.7, 60 sec: 45192.5, 300 sec: 43264.8). Total num frames: 1655349248. Throughput: 0: 11023.6. Samples: 163821552. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:33,976][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:34,475][626795] Updated weights for policy 0, policy_version 202072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:39,247][24592] Fps is (10 sec: 33497.9, 60 sec: 42688.0, 300 sec: 42753.5). Total num frames: 1655447552. Throughput: 0: 10212.3. Samples: 163857846. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:39,248][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:39,359][626795] Updated weights for policy 0, policy_version 202082 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:41,119][626795] Updated weights for policy 0, policy_version 202092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:42,981][626795] Updated weights for policy 0, policy_version 202102 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:43,975][24592] Fps is (10 sec: 31129.8, 60 sec: 42325.3, 300 sec: 42709.5). Total num frames: 1655660544. Throughput: 0: 10203.3. Samples: 163890792. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:43,976][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:44,866][626795] Updated weights for policy 0, policy_version 202112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:46,629][626795] Updated weights for policy 0, policy_version 202122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:48,375][626795] Updated weights for policy 0, policy_version 202132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:48,976][24592] Fps is (10 sec: 44627.1, 60 sec: 42052.2, 300 sec: 43149.0). Total num frames: 1655881728. Throughput: 0: 10580.0. Samples: 163958094. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:48,977][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:50,117][626795] Updated weights for policy 0, policy_version 202142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:51,877][626795] Updated weights for policy 0, policy_version 202152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:53,722][626795] Updated weights for policy 0, policy_version 202162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:53,976][24592] Fps is (10 sec: 46693.6, 60 sec: 42325.2, 300 sec: 43209.3). Total num frames: 1656127488. Throughput: 0: 10983.6. Samples: 164028666. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:53,977][24592] Avg episode reward: [(0, '4.381')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:55,307][626795] Updated weights for policy 0, policy_version 202172 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:57,130][626795] Updated weights for policy 0, policy_version 202182 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:35:58,754][626795] Updated weights for policy 0, policy_version 202192 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:58,981][24592] Fps is (10 sec: 48304.0, 60 sec: 42184.5, 300 sec: 43208.5). Total num frames: 1656365056. Throughput: 0: 10968.8. Samples: 164063682. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:35:58,986][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:00,545][626795] Updated weights for policy 0, policy_version 202202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:02,143][626795] Updated weights for policy 0, policy_version 202212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:03,818][626795] Updated weights for policy 0, policy_version 202222 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:03,975][24592] Fps is (10 sec: 47514.6, 60 sec: 44646.4, 300 sec: 43209.3). Total num frames: 1656602624. Throughput: 0: 10993.3. Samples: 164135916. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:03,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:05,576][626795] Updated weights for policy 0, policy_version 202232 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:07,338][626795] Updated weights for policy 0, policy_version 202242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:08,967][626795] Updated weights for policy 0, policy_version 202252 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:08,976][24592] Fps is (10 sec: 48360.5, 60 sec: 45055.7, 300 sec: 43237.1). Total num frames: 1656848384. Throughput: 0: 11024.8. Samples: 164209158. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:08,976][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:10,574][626795] Updated weights for policy 0, policy_version 202262 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:14,459][24592] Fps is (10 sec: 34380.8, 60 sec: 42663.9, 300 sec: 42750.4). Total num frames: 1656963072. Throughput: 0: 10110.8. Samples: 164209158. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:14,460][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:15,345][626795] Updated weights for policy 0, policy_version 202272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:17,037][626795] Updated weights for policy 0, policy_version 202282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:18,753][626795] Updated weights for policy 0, policy_version 202292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:18,976][24592] Fps is (10 sec: 33587.3, 60 sec: 42734.8, 300 sec: 42765.0). Total num frames: 1657184256. Throughput: 0: 10213.9. Samples: 164281182. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:18,984][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:20,542][626795] Updated weights for policy 0, policy_version 202302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:22,284][626795] Updated weights for policy 0, policy_version 202312 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:23,927][626795] Updated weights for policy 0, policy_version 202322 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:23,975][24592] Fps is (10 sec: 48208.1, 60 sec: 42598.7, 300 sec: 43263.7). Total num frames: 1657421824. Throughput: 0: 11046.3. Samples: 164351934. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:23,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:25,731][626795] Updated weights for policy 0, policy_version 202332 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:27,370][626795] Updated weights for policy 0, policy_version 202342 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:28,975][24592] Fps is (10 sec: 47514.8, 60 sec: 42598.3, 300 sec: 43292.8). Total num frames: 1657659392. Throughput: 0: 11057.7. Samples: 164388390. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:28,978][24592] Avg episode reward: [(0, '4.908')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:29,015][626795] Updated weights for policy 0, policy_version 202352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:30,800][626795] Updated weights for policy 0, policy_version 202362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:32,464][626795] Updated weights for policy 0, policy_version 202372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:33,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42461.9, 300 sec: 43264.9). Total num frames: 1657896960. Throughput: 0: 11167.5. Samples: 164460630. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:33,976][24592] Avg episode reward: [(0, '4.913')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:34,136][626795] Updated weights for policy 0, policy_version 202382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:35,833][626795] Updated weights for policy 0, policy_version 202392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:37,553][626795] Updated weights for policy 0, policy_version 202402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:38,975][24592] Fps is (10 sec: 48333.5, 60 sec: 45123.4, 300 sec: 43320.8). Total num frames: 1658142720. Throughput: 0: 11213.1. Samples: 164533254. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:38,977][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:39,251][626795] Updated weights for policy 0, policy_version 202412 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:40,978][626795] Updated weights for policy 0, policy_version 202422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:42,684][626795] Updated weights for policy 0, policy_version 202432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:43,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45329.0, 300 sec: 43320.4). Total num frames: 1658380288. Throughput: 0: 11232.7. Samples: 164569086. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:43,976][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:44,383][626795] Updated weights for policy 0, policy_version 202442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:46,058][626795] Updated weights for policy 0, policy_version 202452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:50,113][24592] Fps is (10 sec: 33099.0, 60 sec: 43011.9, 300 sec: 42794.4). Total num frames: 1658511360. Throughput: 0: 10174.7. Samples: 164605350. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:50,115][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:51,232][626772] Signal inference workers to stop experience collection... (2250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:51,233][626772] Signal inference workers to resume experience collection... (2250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:51,241][626795] InferenceWorker_p0-w0: stopping experience collection (2250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:51,248][626795] InferenceWorker_p0-w0: resuming experience collection (2250 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:51,276][626795] Updated weights for policy 0, policy_version 202462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:53,048][626795] Updated weights for policy 0, policy_version 202472 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:53,975][24592] Fps is (10 sec: 30310.7, 60 sec: 42598.6, 300 sec: 42737.3). Total num frames: 1658683392. Throughput: 0: 10220.6. Samples: 164669082. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:53,978][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:55,018][626795] Updated weights for policy 0, policy_version 202482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:56,890][626795] Updated weights for policy 0, policy_version 202492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:36:58,714][626795] Updated weights for policy 0, policy_version 202502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:58,975][24592] Fps is (10 sec: 44368.3, 60 sec: 42329.7, 300 sec: 43157.4). Total num frames: 1658904576. Throughput: 0: 11063.5. Samples: 164701662. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:36:58,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:00,403][626795] Updated weights for policy 0, policy_version 202512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:02,037][626795] Updated weights for policy 0, policy_version 202522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:03,786][626795] Updated weights for policy 0, policy_version 202532 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:03,975][24592] Fps is (10 sec: 46694.2, 60 sec: 42461.8, 300 sec: 43237.1). Total num frames: 1659150336. Throughput: 0: 10905.2. Samples: 164771910. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:03,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000202533_1659150336.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:04,041][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000201276_1648852992.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:05,493][626795] Updated weights for policy 0, policy_version 202542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:07,264][626795] Updated weights for policy 0, policy_version 202552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:08,851][626795] Updated weights for policy 0, policy_version 202562 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:08,975][24592] Fps is (10 sec: 48332.4, 60 sec: 42325.5, 300 sec: 43237.1). Total num frames: 1659387904. Throughput: 0: 10950.4. Samples: 164844702. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:08,976][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:10,538][626795] Updated weights for policy 0, policy_version 202572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:12,254][626795] Updated weights for policy 0, policy_version 202582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:13,923][626795] Updated weights for policy 0, policy_version 202592 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:13,975][24592] Fps is (10 sec: 48332.9, 60 sec: 44871.8, 300 sec: 43264.9). Total num frames: 1659633664. Throughput: 0: 10948.7. Samples: 164881080. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:13,978][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:15,632][626795] Updated weights for policy 0, policy_version 202602 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:17,381][626795] Updated weights for policy 0, policy_version 202612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:18,958][626795] Updated weights for policy 0, policy_version 202622 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:18,975][24592] Fps is (10 sec: 49152.2, 60 sec: 44919.7, 300 sec: 43292.6). Total num frames: 1659879424. Throughput: 0: 10965.9. Samples: 164954094. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:18,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:20,806][626795] Updated weights for policy 0, policy_version 202632 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:25,185][24592] Fps is (10 sec: 35808.4, 60 sec: 42693.3, 300 sec: 42839.2). Total num frames: 1660035072. Throughput: 0: 9876.3. Samples: 164989638. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:25,187][24592] Avg episode reward: [(0, '5.088')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:25,249][626795] Updated weights for policy 0, policy_version 202642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:27,008][626795] Updated weights for policy 0, policy_version 202652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:28,709][626795] Updated weights for policy 0, policy_version 202662 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:28,975][24592] Fps is (10 sec: 33587.4, 60 sec: 42598.5, 300 sec: 42820.6). Total num frames: 1660215296. Throughput: 0: 10192.1. Samples: 165027732. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:28,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:30,451][626795] Updated weights for policy 0, policy_version 202672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:32,096][626795] Updated weights for policy 0, policy_version 202682 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:33,858][626795] Updated weights for policy 0, policy_version 202692 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:33,976][24592] Fps is (10 sec: 47527.6, 60 sec: 42598.2, 300 sec: 42820.5). Total num frames: 1660452864. Throughput: 0: 11250.6. Samples: 165098832. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:33,976][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:35,575][626795] Updated weights for policy 0, policy_version 202702 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:37,246][626795] Updated weights for policy 0, policy_version 202712 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:38,914][626795] Updated weights for policy 0, policy_version 202722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:38,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42598.4, 300 sec: 43375.9). Total num frames: 1660698624. Throughput: 0: 11169.5. Samples: 165171708. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:38,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:40,616][626795] Updated weights for policy 0, policy_version 202732 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:42,353][626795] Updated weights for policy 0, policy_version 202742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:43,975][24592] Fps is (10 sec: 48334.2, 60 sec: 42598.4, 300 sec: 43376.0). Total num frames: 1660936192. Throughput: 0: 11247.6. Samples: 165207804. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:43,977][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:44,097][626795] Updated weights for policy 0, policy_version 202752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:45,728][626795] Updated weights for policy 0, policy_version 202762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:47,489][626795] Updated weights for policy 0, policy_version 202772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:48,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45370.0, 300 sec: 43431.5). Total num frames: 1661181952. Throughput: 0: 11290.9. Samples: 165280002. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:48,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:49,122][626795] Updated weights for policy 0, policy_version 202782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:50,889][626795] Updated weights for policy 0, policy_version 202792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:52,634][626795] Updated weights for policy 0, policy_version 202802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:53,976][24592] Fps is (10 sec: 47513.3, 60 sec: 45465.5, 300 sec: 43375.9). Total num frames: 1661411328. Throughput: 0: 11265.5. Samples: 165351648. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:37:53,979][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:54,254][626795] Updated weights for policy 0, policy_version 202812 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:37:55,961][626795] Updated weights for policy 0, policy_version 202822 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:00,904][24592] Fps is (10 sec: 33650.8, 60 sec: 43256.0, 300 sec: 42873.5). Total num frames: 1661583360. Throughput: 0: 10794.2. Samples: 165387636. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:00,905][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:01,253][626795] Updated weights for policy 0, policy_version 202832 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:03,095][626795] Updated weights for policy 0, policy_version 202842 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:03,975][24592] Fps is (10 sec: 30310.8, 60 sec: 42734.9, 300 sec: 42792.8). Total num frames: 1661714432. Throughput: 0: 10269.5. Samples: 165416220. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:03,977][24592] Avg episode reward: [(0, '4.898')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:04,980][626795] Updated weights for policy 0, policy_version 202852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:06,789][626795] Updated weights for policy 0, policy_version 202862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:08,495][626795] Updated weights for policy 0, policy_version 202872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:08,975][24592] Fps is (10 sec: 44657.2, 60 sec: 42598.4, 300 sec: 42765.0). Total num frames: 1661943808. Throughput: 0: 11300.1. Samples: 165484470. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:08,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:10,254][626795] Updated weights for policy 0, policy_version 202882 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:11,953][626795] Updated weights for policy 0, policy_version 202892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:13,541][626795] Updated weights for policy 0, policy_version 202902 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:13,976][24592] Fps is (10 sec: 46692.9, 60 sec: 42461.6, 300 sec: 43292.6). Total num frames: 1662181376. Throughput: 0: 10937.0. Samples: 165519900. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:13,978][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:15,309][626795] Updated weights for policy 0, policy_version 202912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:17,011][626795] Updated weights for policy 0, policy_version 202922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:18,708][626795] Updated weights for policy 0, policy_version 202932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:18,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42461.9, 300 sec: 43376.0). Total num frames: 1662427136. Throughput: 0: 10979.3. Samples: 165592896. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:18,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:20,373][626795] Updated weights for policy 0, policy_version 202942 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:22,065][626795] Updated weights for policy 0, policy_version 202952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:23,800][626795] Updated weights for policy 0, policy_version 202962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:23,975][24592] Fps is (10 sec: 48334.4, 60 sec: 44729.1, 300 sec: 43403.7). Total num frames: 1662664704. Throughput: 0: 10959.9. Samples: 165664902. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:23,977][24592] Avg episode reward: [(0, '4.844')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:25,422][626795] Updated weights for policy 0, policy_version 202972 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:27,154][626795] Updated weights for policy 0, policy_version 202982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:28,876][626795] Updated weights for policy 0, policy_version 202992 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:28,975][24592] Fps is (10 sec: 49152.1, 60 sec: 45056.0, 300 sec: 43431.5). Total num frames: 1662918656. Throughput: 0: 10974.7. Samples: 165701664. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:28,976][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:30,546][626795] Updated weights for policy 0, policy_version 203002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:32,252][626795] Updated weights for policy 0, policy_version 203012 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:36,154][24592] Fps is (10 sec: 36323.9, 60 sec: 42687.2, 300 sec: 42947.7). Total num frames: 1663107072. Throughput: 0: 10466.0. Samples: 165773772. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:36,157][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:37,023][626795] Updated weights for policy 0, policy_version 203022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:38,855][626795] Updated weights for policy 0, policy_version 203032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:38,975][24592] Fps is (10 sec: 32768.0, 60 sec: 42461.9, 300 sec: 42931.6). Total num frames: 1663246336. Throughput: 0: 10149.4. Samples: 165808368. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:38,976][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:40,639][626795] Updated weights for policy 0, policy_version 203042 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:42,249][626795] Updated weights for policy 0, policy_version 203052 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:43,907][626795] Updated weights for policy 0, policy_version 203062 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:43,975][24592] Fps is (10 sec: 48178.7, 60 sec: 42461.9, 300 sec: 42903.9). Total num frames: 1663483904. Throughput: 0: 10578.6. Samples: 165843270. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:43,976][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:45,633][626795] Updated weights for policy 0, policy_version 203072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:47,321][626795] Updated weights for policy 0, policy_version 203082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:48,969][626795] Updated weights for policy 0, policy_version 203092 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:48,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42461.9, 300 sec: 43431.7). Total num frames: 1663729664. Throughput: 0: 11114.4. Samples: 165916368. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:48,976][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:50,727][626795] Updated weights for policy 0, policy_version 203102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:52,373][626795] Updated weights for policy 0, policy_version 203112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:53,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42598.5, 300 sec: 43459.3). Total num frames: 1663967232. Throughput: 0: 11217.7. Samples: 165989268. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:53,978][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:54,084][626795] Updated weights for policy 0, policy_version 203122 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:55,795][626795] Updated weights for policy 0, policy_version 203132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:57,542][626795] Updated weights for policy 0, policy_version 203142 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:58,976][24592] Fps is (10 sec: 47512.5, 60 sec: 45141.5, 300 sec: 43487.0). Total num frames: 1664204800. Throughput: 0: 11227.6. Samples: 166025142. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:38:58,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:38:59,254][626795] Updated weights for policy 0, policy_version 203152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:00,905][626795] Updated weights for policy 0, policy_version 203162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:02,634][626795] Updated weights for policy 0, policy_version 203172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:03,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45602.2, 300 sec: 43487.0). Total num frames: 1664450560. Throughput: 0: 11217.9. Samples: 166097700. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:03,976][24592] Avg episode reward: [(0, '4.463')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000203180_1664450560.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:04,033][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000201895_1653923840.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:04,326][626795] Updated weights for policy 0, policy_version 203182 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:06,022][626795] Updated weights for policy 0, policy_version 203192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:07,764][626795] Updated weights for policy 0, policy_version 203202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:11,661][24592] Fps is (10 sec: 35519.0, 60 sec: 43256.7, 300 sec: 42984.7). Total num frames: 1664655360. Throughput: 0: 9825.8. Samples: 166133448. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:11,662][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:12,836][626795] Updated weights for policy 0, policy_version 203212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:13,975][24592] Fps is (10 sec: 30310.0, 60 sec: 42871.6, 300 sec: 42903.9). Total num frames: 1664753664. Throughput: 0: 10325.4. Samples: 166166310. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:13,977][24592] Avg episode reward: [(0, '4.861')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:14,817][626795] Updated weights for policy 0, policy_version 203222 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:16,422][626795] Updated weights for policy 0, policy_version 203232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:18,122][626795] Updated weights for policy 0, policy_version 203242 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:18,975][24592] Fps is (10 sec: 47037.1, 60 sec: 42871.5, 300 sec: 42959.4). Total num frames: 1664999424. Throughput: 0: 10758.1. Samples: 166234452. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:18,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:19,844][626795] Updated weights for policy 0, policy_version 203252 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:21,566][626795] Updated weights for policy 0, policy_version 203262 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:23,208][626795] Updated weights for policy 0, policy_version 203272 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:23,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42871.4, 300 sec: 43459.3). Total num frames: 1665236992. Throughput: 0: 11071.6. Samples: 166306590. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:23,977][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:24,975][626795] Updated weights for policy 0, policy_version 203282 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:26,746][626795] Updated weights for policy 0, policy_version 203292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:28,328][626795] Updated weights for policy 0, policy_version 203302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:28,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42734.9, 300 sec: 43542.6). Total num frames: 1665482752. Throughput: 0: 11100.3. Samples: 166342782. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:28,977][24592] Avg episode reward: [(0, '4.383')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:30,062][626795] Updated weights for policy 0, policy_version 203312 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:31,776][626795] Updated weights for policy 0, policy_version 203322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:33,437][626795] Updated weights for policy 0, policy_version 203332 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:33,975][24592] Fps is (10 sec: 48332.8, 60 sec: 45195.0, 300 sec: 43544.5). Total num frames: 1665720320. Throughput: 0: 11082.2. Samples: 166415070. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:33,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:35,073][626795] Updated weights for policy 0, policy_version 203342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:36,839][626795] Updated weights for policy 0, policy_version 203352 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:38,530][626795] Updated weights for policy 0, policy_version 203362 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:38,975][24592] Fps is (10 sec: 47513.2, 60 sec: 45192.5, 300 sec: 43514.8). Total num frames: 1665957888. Throughput: 0: 11077.6. Samples: 166487760. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:38,976][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:40,184][626795] Updated weights for policy 0, policy_version 203372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:41,940][626795] Updated weights for policy 0, policy_version 203382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:43,738][626795] Updated weights for policy 0, policy_version 203392 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:47,103][24592] Fps is (10 sec: 35569.4, 60 sec: 42823.6, 300 sec: 43030.8). Total num frames: 1666187264. Throughput: 0: 10356.8. Samples: 166523592. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:47,107][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:48,818][626795] Updated weights for policy 0, policy_version 203402 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:48,976][24592] Fps is (10 sec: 31948.1, 60 sec: 42461.7, 300 sec: 43014.9). Total num frames: 1666277376. Throughput: 0: 10169.4. Samples: 166555326. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:48,977][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:50,569][626795] Updated weights for policy 0, policy_version 203412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:52,221][626795] Updated weights for policy 0, policy_version 203422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:53,975][24592] Fps is (10 sec: 46489.8, 60 sec: 42325.3, 300 sec: 42959.4). Total num frames: 1666506752. Throughput: 0: 11635.4. Samples: 166625796. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:53,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:53,983][626795] Updated weights for policy 0, policy_version 203432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:55,719][626795] Updated weights for policy 0, policy_version 203442 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:57,368][626795] Updated weights for policy 0, policy_version 203452 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:58,975][24592] Fps is (10 sec: 47514.8, 60 sec: 42462.0, 300 sec: 43487.0). Total num frames: 1666752512. Throughput: 0: 11000.8. Samples: 166661346. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:39:58,977][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:39:59,058][626795] Updated weights for policy 0, policy_version 203462 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:00,791][626795] Updated weights for policy 0, policy_version 203472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:02,583][626795] Updated weights for policy 0, policy_version 203482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:03,976][24592] Fps is (10 sec: 48330.4, 60 sec: 42325.0, 300 sec: 43542.5). Total num frames: 1666990080. Throughput: 0: 11078.1. Samples: 166732974. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:03,978][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:04,341][626795] Updated weights for policy 0, policy_version 203492 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:05,926][626795] Updated weights for policy 0, policy_version 203502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:07,706][626795] Updated weights for policy 0, policy_version 203512 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:08,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44880.0, 300 sec: 43542.5). Total num frames: 1667227648. Throughput: 0: 11080.8. Samples: 166805226. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:08,976][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:09,376][626795] Updated weights for policy 0, policy_version 203522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:11,116][626795] Updated weights for policy 0, policy_version 203532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:12,658][626795] Updated weights for policy 0, policy_version 203542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:13,975][24592] Fps is (10 sec: 48335.2, 60 sec: 45329.1, 300 sec: 43570.4). Total num frames: 1667473408. Throughput: 0: 11078.7. Samples: 166841322. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:13,977][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:14,416][626795] Updated weights for policy 0, policy_version 203552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:16,094][626795] Updated weights for policy 0, policy_version 203562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:17,859][626795] Updated weights for policy 0, policy_version 203572 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:18,978][24592] Fps is (10 sec: 47501.9, 60 sec: 45054.1, 300 sec: 43514.5). Total num frames: 1667702784. Throughput: 0: 11090.6. Samples: 166914174. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:18,979][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:22,938][626795] Updated weights for policy 0, policy_version 203582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:23,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42461.9, 300 sec: 42987.2). Total num frames: 1667784704. Throughput: 0: 10154.5. Samples: 166944714. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:23,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:24,811][626795] Updated weights for policy 0, policy_version 203592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:26,558][626795] Updated weights for policy 0, policy_version 203602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:28,257][626795] Updated weights for policy 0, policy_version 203612 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:28,976][24592] Fps is (10 sec: 31956.1, 60 sec: 42325.1, 300 sec: 42959.4). Total num frames: 1668022272. Throughput: 0: 10872.3. Samples: 166978842. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:28,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:29,843][626795] Updated weights for policy 0, policy_version 203622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:31,614][626795] Updated weights for policy 0, policy_version 203632 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:33,308][626795] Updated weights for policy 0, policy_version 203642 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:33,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42461.9, 300 sec: 43499.2). Total num frames: 1668268032. Throughput: 0: 11028.6. Samples: 167051610. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:33,976][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:35,021][626795] Updated weights for policy 0, policy_version 203652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:36,646][626795] Updated weights for policy 0, policy_version 203662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:38,383][626795] Updated weights for policy 0, policy_version 203672 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:38,975][24592] Fps is (10 sec: 48334.2, 60 sec: 42461.9, 300 sec: 43542.6). Total num frames: 1668505600. Throughput: 0: 11078.8. Samples: 167124342. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:38,977][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:40,121][626795] Updated weights for policy 0, policy_version 203682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:41,811][626795] Updated weights for policy 0, policy_version 203692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:43,510][626795] Updated weights for policy 0, policy_version 203702 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:43,976][24592] Fps is (10 sec: 47509.6, 60 sec: 44940.5, 300 sec: 43598.0). Total num frames: 1668743168. Throughput: 0: 11077.9. Samples: 167159862. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:43,978][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:45,136][626795] Updated weights for policy 0, policy_version 203712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:46,875][626795] Updated weights for policy 0, policy_version 203722 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:48,621][626795] Updated weights for policy 0, policy_version 203732 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:48,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45056.2, 300 sec: 43570.4). Total num frames: 1668980736. Throughput: 0: 11088.1. Samples: 167231934. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:48,976][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:50,443][626795] Updated weights for policy 0, policy_version 203742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:52,096][626795] Updated weights for policy 0, policy_version 203752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:53,935][626795] Updated weights for policy 0, policy_version 203762 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:53,975][24592] Fps is (10 sec: 47517.5, 60 sec: 45192.5, 300 sec: 43571.2). Total num frames: 1669218304. Throughput: 0: 11042.4. Samples: 167302134. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:53,976][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:58,976][24592] Fps is (10 sec: 31128.0, 60 sec: 42325.0, 300 sec: 43014.9). Total num frames: 1669292032. Throughput: 0: 10548.5. Samples: 167316012. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:40:58,984][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:40:59,003][626795] Updated weights for policy 0, policy_version 203772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:00,810][626795] Updated weights for policy 0, policy_version 203782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:02,535][626795] Updated weights for policy 0, policy_version 203792 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:03,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42325.7, 300 sec: 42987.2). Total num frames: 1669529600. Throughput: 0: 10087.9. Samples: 167368104. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:03,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000203800_1669529600.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:04,055][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000202533_1659150336.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:04,285][626795] Updated weights for policy 0, policy_version 203802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:05,932][626795] Updated weights for policy 0, policy_version 203812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:07,916][626795] Updated weights for policy 0, policy_version 203822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:08,976][24592] Fps is (10 sec: 45875.8, 60 sec: 42052.1, 300 sec: 43419.4). Total num frames: 1669750784. Throughput: 0: 10924.4. Samples: 167436318. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:08,976][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:09,716][626795] Updated weights for policy 0, policy_version 203832 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:11,435][626795] Updated weights for policy 0, policy_version 203842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:13,060][626795] Updated weights for policy 0, policy_version 203852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:13,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42052.3, 300 sec: 43431.5). Total num frames: 1669996544. Throughput: 0: 10956.1. Samples: 167471862. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:13,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:14,905][626795] Updated weights for policy 0, policy_version 203862 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:16,508][626795] Updated weights for policy 0, policy_version 203872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:18,217][626795] Updated weights for policy 0, policy_version 203882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:18,975][24592] Fps is (10 sec: 48334.2, 60 sec: 42190.5, 300 sec: 43431.5). Total num frames: 1670234112. Throughput: 0: 10945.7. Samples: 167544168. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:18,977][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:19,967][626795] Updated weights for policy 0, policy_version 203892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:21,531][626795] Updated weights for policy 0, policy_version 203902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:23,312][626795] Updated weights for policy 0, policy_version 203912 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:23,975][24592] Fps is (10 sec: 47513.4, 60 sec: 44782.9, 300 sec: 43431.5). Total num frames: 1670471680. Throughput: 0: 10930.8. Samples: 167616228. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:23,977][24592] Avg episode reward: [(0, '4.917')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:25,080][626795] Updated weights for policy 0, policy_version 203922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:26,768][626795] Updated weights for policy 0, policy_version 203932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:28,450][626795] Updated weights for policy 0, policy_version 203942 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:28,975][24592] Fps is (10 sec: 47514.1, 60 sec: 44783.2, 300 sec: 43431.5). Total num frames: 1670709248. Throughput: 0: 10941.4. Samples: 167652216. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:28,977][24592] Avg episode reward: [(0, '4.913')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:30,361][626795] Updated weights for policy 0, policy_version 203952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:33,975][24592] Fps is (10 sec: 31948.8, 60 sec: 42052.2, 300 sec: 42876.1). Total num frames: 1670791168. Throughput: 0: 10308.5. Samples: 167695818. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:33,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:35,365][626795] Updated weights for policy 0, policy_version 203962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:37,286][626795] Updated weights for policy 0, policy_version 203972 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:38,897][626795] Updated weights for policy 0, policy_version 203982 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:38,975][24592] Fps is (10 sec: 31129.5, 60 sec: 41915.7, 300 sec: 42848.3). Total num frames: 1671020544. Throughput: 0: 10003.7. Samples: 167752302. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:38,977][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:40,675][626795] Updated weights for policy 0, policy_version 203992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:42,339][626795] Updated weights for policy 0, policy_version 204002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:43,976][24592] Fps is (10 sec: 46692.6, 60 sec: 41916.0, 300 sec: 43376.5). Total num frames: 1671258112. Throughput: 0: 10504.8. Samples: 167788728. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:43,978][24592] Avg episode reward: [(0, '4.851')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:44,072][626795] Updated weights for policy 0, policy_version 204012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:45,670][626795] Updated weights for policy 0, policy_version 204022 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:47,507][626795] Updated weights for policy 0, policy_version 204032 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:48,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42052.3, 300 sec: 43459.2). Total num frames: 1671503872. Throughput: 0: 10950.8. Samples: 167860890. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:48,977][24592] Avg episode reward: [(0, '4.940')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:49,190][626795] Updated weights for policy 0, policy_version 204042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:50,730][626795] Updated weights for policy 0, policy_version 204052 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:52,458][626795] Updated weights for policy 0, policy_version 204062 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:53,975][24592] Fps is (10 sec: 48334.9, 60 sec: 42052.3, 300 sec: 43514.8). Total num frames: 1671741440. Throughput: 0: 11036.4. Samples: 167932950. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:53,977][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:54,225][626795] Updated weights for policy 0, policy_version 204072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:56,001][626795] Updated weights for policy 0, policy_version 204082 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:57,650][626795] Updated weights for policy 0, policy_version 204092 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:58,975][24592] Fps is (10 sec: 47513.8, 60 sec: 44783.3, 300 sec: 43487.0). Total num frames: 1671979008. Throughput: 0: 11050.8. Samples: 167969148. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:41:58,976][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:41:59,331][626795] Updated weights for policy 0, policy_version 204102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:01,073][626795] Updated weights for policy 0, policy_version 204112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:02,761][626795] Updated weights for policy 0, policy_version 204122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:03,975][24592] Fps is (10 sec: 48332.4, 60 sec: 44919.4, 300 sec: 43514.8). Total num frames: 1672224768. Throughput: 0: 11056.9. Samples: 168041730. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:03,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:04,444][626795] Updated weights for policy 0, policy_version 204132 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:09,047][24592] Fps is (10 sec: 32536.3, 60 sec: 42548.2, 300 sec: 42949.0). Total num frames: 1672306688. Throughput: 0: 10208.7. Samples: 168076344. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:09,048][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:09,449][626795] Updated weights for policy 0, policy_version 204142 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:11,329][626795] Updated weights for policy 0, policy_version 204152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:12,998][626795] Updated weights for policy 0, policy_version 204162 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:13,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42325.3, 300 sec: 42903.9). Total num frames: 1672536064. Throughput: 0: 10140.5. Samples: 168108540. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:13,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:14,657][626795] Updated weights for policy 0, policy_version 204172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:15,144][626772] Signal inference workers to stop experience collection... (2300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:15,144][626772] Signal inference workers to resume experience collection... (2300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:15,154][626795] InferenceWorker_p0-w0: stopping experience collection (2300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:15,160][626795] InferenceWorker_p0-w0: resuming experience collection (2300 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:16,394][626795] Updated weights for policy 0, policy_version 204182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:18,110][626795] Updated weights for policy 0, policy_version 204192 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:18,976][24592] Fps is (10 sec: 47024.8, 60 sec: 42324.7, 300 sec: 43359.2). Total num frames: 1672773632. Throughput: 0: 10765.0. Samples: 168180252. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:18,978][24592] Avg episode reward: [(0, '5.075')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:19,829][626795] Updated weights for policy 0, policy_version 204202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:21,522][626795] Updated weights for policy 0, policy_version 204212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:23,192][626795] Updated weights for policy 0, policy_version 204222 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:23,976][24592] Fps is (10 sec: 48330.5, 60 sec: 42461.5, 300 sec: 43403.6). Total num frames: 1673019392. Throughput: 0: 11114.1. Samples: 168252444. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:23,978][24592] Avg episode reward: [(0, '4.936')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:24,887][626795] Updated weights for policy 0, policy_version 204232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:26,653][626795] Updated weights for policy 0, policy_version 204242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:28,356][626795] Updated weights for policy 0, policy_version 204252 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:28,975][24592] Fps is (10 sec: 48337.4, 60 sec: 42461.9, 300 sec: 43403.8). Total num frames: 1673256960. Throughput: 0: 11117.7. Samples: 168289020. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:28,976][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:30,025][626795] Updated weights for policy 0, policy_version 204262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:31,667][626795] Updated weights for policy 0, policy_version 204272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:33,415][626795] Updated weights for policy 0, policy_version 204282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:33,976][24592] Fps is (10 sec: 48334.6, 60 sec: 45192.4, 300 sec: 43403.7). Total num frames: 1673502720. Throughput: 0: 11115.3. Samples: 168361080. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:33,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:35,066][626795] Updated weights for policy 0, policy_version 204292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:36,799][626795] Updated weights for policy 0, policy_version 204302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:38,445][626795] Updated weights for policy 0, policy_version 204312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:38,975][24592] Fps is (10 sec: 49151.9, 60 sec: 45465.6, 300 sec: 43431.5). Total num frames: 1673748480. Throughput: 0: 11131.5. Samples: 168433866. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:38,976][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:40,274][626795] Updated weights for policy 0, policy_version 204322 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:44,557][24592] Fps is (10 sec: 32515.3, 60 sec: 42730.5, 300 sec: 42847.1). Total num frames: 1673846784. Throughput: 0: 10195.2. Samples: 168433866. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:44,558][24592] Avg episode reward: [(0, '4.937')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:45,402][626795] Updated weights for policy 0, policy_version 204332 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:47,332][626795] Updated weights for policy 0, policy_version 204342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:48,976][24592] Fps is (10 sec: 29489.9, 60 sec: 42325.0, 300 sec: 42820.5). Total num frames: 1674043392. Throughput: 0: 10125.6. Samples: 168497388. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:48,978][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:49,010][626795] Updated weights for policy 0, policy_version 204352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:50,851][626795] Updated weights for policy 0, policy_version 204362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:52,582][626795] Updated weights for policy 0, policy_version 204372 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:53,975][24592] Fps is (10 sec: 46099.7, 60 sec: 42325.3, 300 sec: 43326.0). Total num frames: 1674280960. Throughput: 0: 10928.5. Samples: 168567348. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:53,976][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:54,447][626795] Updated weights for policy 0, policy_version 204382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:56,028][626795] Updated weights for policy 0, policy_version 204392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:57,731][626795] Updated weights for policy 0, policy_version 204402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:58,975][24592] Fps is (10 sec: 46696.5, 60 sec: 42188.8, 300 sec: 43375.9). Total num frames: 1674510336. Throughput: 0: 10966.6. Samples: 168602034. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:42:58,976][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:42:59,529][626795] Updated weights for policy 0, policy_version 204412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:01,269][626795] Updated weights for policy 0, policy_version 204422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:03,059][626795] Updated weights for policy 0, policy_version 204432 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:03,975][24592] Fps is (10 sec: 47513.2, 60 sec: 42188.8, 300 sec: 43431.5). Total num frames: 1674756096. Throughput: 0: 10958.9. Samples: 168673392. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:03,978][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000204438_1674756096.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:04,059][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000203180_1664450560.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:04,777][626795] Updated weights for policy 0, policy_version 204442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:06,501][626795] Updated weights for policy 0, policy_version 204452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:08,294][626795] Updated weights for policy 0, policy_version 204462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:08,976][24592] Fps is (10 sec: 47511.0, 60 sec: 44699.0, 300 sec: 43403.7). Total num frames: 1674985472. Throughput: 0: 10908.5. Samples: 168743328. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:08,977][24592] Avg episode reward: [(0, '4.837')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:10,071][626795] Updated weights for policy 0, policy_version 204472 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:11,846][626795] Updated weights for policy 0, policy_version 204482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:13,537][626795] Updated weights for policy 0, policy_version 204492 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:13,976][24592] Fps is (10 sec: 45874.2, 60 sec: 44646.2, 300 sec: 43348.1). Total num frames: 1675214848. Throughput: 0: 10863.3. Samples: 168777870. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:13,977][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:15,322][626795] Updated weights for policy 0, policy_version 204502 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:19,354][24592] Fps is (10 sec: 34732.5, 60 sec: 42603.5, 300 sec: 42932.1). Total num frames: 1675345920. Throughput: 0: 9966.6. Samples: 168813348. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:19,355][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:19,607][626795] Updated weights for policy 0, policy_version 204512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:21,368][626795] Updated weights for policy 0, policy_version 204522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:23,098][626795] Updated weights for policy 0, policy_version 204532 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:23,976][24592] Fps is (10 sec: 34405.5, 60 sec: 42325.4, 300 sec: 42848.2). Total num frames: 1675558912. Throughput: 0: 10105.1. Samples: 168888600. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:23,977][24592] Avg episode reward: [(0, '4.855')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:24,807][626795] Updated weights for policy 0, policy_version 204542 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:26,537][626795] Updated weights for policy 0, policy_version 204552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:28,212][626795] Updated weights for policy 0, policy_version 204562 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:28,975][24592] Fps is (10 sec: 47679.4, 60 sec: 42461.9, 300 sec: 43362.9). Total num frames: 1675804672. Throughput: 0: 11039.1. Samples: 168924204. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:28,976][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:29,936][626795] Updated weights for policy 0, policy_version 204572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:31,661][626795] Updated weights for policy 0, policy_version 204582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:33,391][626795] Updated weights for policy 0, policy_version 204592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:33,975][24592] Fps is (10 sec: 48335.0, 60 sec: 42325.4, 300 sec: 43375.9). Total num frames: 1676042240. Throughput: 0: 11089.6. Samples: 168996414. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:33,976][24592] Avg episode reward: [(0, '4.938')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:34,997][626795] Updated weights for policy 0, policy_version 204602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:36,805][626795] Updated weights for policy 0, policy_version 204612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:38,468][626795] Updated weights for policy 0, policy_version 204622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:38,976][24592] Fps is (10 sec: 47510.2, 60 sec: 42188.3, 300 sec: 43375.8). Total num frames: 1676279808. Throughput: 0: 11140.1. Samples: 169068660. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:38,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:40,268][626795] Updated weights for policy 0, policy_version 204632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:41,805][626795] Updated weights for policy 0, policy_version 204642 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:43,617][626795] Updated weights for policy 0, policy_version 204652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:43,975][24592] Fps is (10 sec: 47514.0, 60 sec: 44945.7, 300 sec: 43348.2). Total num frames: 1676517376. Throughput: 0: 11165.2. Samples: 169104468. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:43,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:45,326][626795] Updated weights for policy 0, policy_version 204662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:47,044][626795] Updated weights for policy 0, policy_version 204672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:48,727][626795] Updated weights for policy 0, policy_version 204682 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:48,977][24592] Fps is (10 sec: 48329.2, 60 sec: 45328.3, 300 sec: 43375.7). Total num frames: 1676763136. Throughput: 0: 11172.2. Samples: 169176156. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:48,978][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:50,474][626795] Updated weights for policy 0, policy_version 204692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:55,029][24592] Fps is (10 sec: 33350.2, 60 sec: 42668.3, 300 sec: 42834.2). Total num frames: 1676886016. Throughput: 0: 10180.3. Samples: 169212162. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:55,030][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:55,669][626795] Updated weights for policy 0, policy_version 204702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:57,345][626795] Updated weights for policy 0, policy_version 204712 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:58,975][24592] Fps is (10 sec: 31134.1, 60 sec: 42734.9, 300 sec: 42792.8). Total num frames: 1677074432. Throughput: 0: 10317.7. Samples: 169242162. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:43:58,977][24592] Avg episode reward: [(0, '4.965')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:43:59,046][626795] Updated weights for policy 0, policy_version 204722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:00,767][626795] Updated weights for policy 0, policy_version 204732 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:02,429][626795] Updated weights for policy 0, policy_version 204742 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:03,975][24592] Fps is (10 sec: 48530.5, 60 sec: 42734.9, 300 sec: 43326.0). Total num frames: 1677320192. Throughput: 0: 11233.5. Samples: 169314606. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:03,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:04,138][626795] Updated weights for policy 0, policy_version 204752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:05,888][626795] Updated weights for policy 0, policy_version 204762 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:07,559][626795] Updated weights for policy 0, policy_version 204772 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:08,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42871.8, 300 sec: 43403.7). Total num frames: 1677557760. Throughput: 0: 11082.5. Samples: 169387308. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:08,977][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:09,251][626795] Updated weights for policy 0, policy_version 204782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:10,971][626795] Updated weights for policy 0, policy_version 204792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:12,651][626795] Updated weights for policy 0, policy_version 204802 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:13,975][24592] Fps is (10 sec: 47514.2, 60 sec: 43008.2, 300 sec: 43375.9). Total num frames: 1677795328. Throughput: 0: 11082.4. Samples: 169422912. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:13,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:14,363][626795] Updated weights for policy 0, policy_version 204812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:16,129][626795] Updated weights for policy 0, policy_version 204822 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:17,773][626795] Updated weights for policy 0, policy_version 204832 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:18,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45204.5, 300 sec: 43403.7). Total num frames: 1678041088. Throughput: 0: 11075.9. Samples: 169494828. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:18,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:19,461][626795] Updated weights for policy 0, policy_version 204842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:21,222][626795] Updated weights for policy 0, policy_version 204852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:22,856][626795] Updated weights for policy 0, policy_version 204862 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:23,975][24592] Fps is (10 sec: 48332.5, 60 sec: 45329.5, 300 sec: 43375.9). Total num frames: 1678278656. Throughput: 0: 11079.8. Samples: 169567242. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:23,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:24,626][626795] Updated weights for policy 0, policy_version 204872 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:26,276][626795] Updated weights for policy 0, policy_version 204882 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:30,855][24592] Fps is (10 sec: 33100.9, 60 sec: 42496.1, 300 sec: 42825.4). Total num frames: 1678434304. Throughput: 0: 10639.4. Samples: 169603236. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:30,856][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:31,574][626795] Updated weights for policy 0, policy_version 204892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:33,249][626795] Updated weights for policy 0, policy_version 204902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:33,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42461.9, 300 sec: 42820.6). Total num frames: 1678589952. Throughput: 0: 10134.1. Samples: 169632174. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:33,977][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:35,063][626795] Updated weights for policy 0, policy_version 204912 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:36,743][626795] Updated weights for policy 0, policy_version 204922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:38,425][626795] Updated weights for policy 0, policy_version 204932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:38,976][24592] Fps is (10 sec: 48420.0, 60 sec: 42462.1, 300 sec: 43307.5). Total num frames: 1678827520. Throughput: 0: 11200.4. Samples: 169704384. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:38,976][24592] Avg episode reward: [(0, '4.910')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:40,215][626795] Updated weights for policy 0, policy_version 204942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:41,828][626795] Updated weights for policy 0, policy_version 204952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:43,563][626795] Updated weights for policy 0, policy_version 204962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:43,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42461.9, 300 sec: 43348.2). Total num frames: 1679065088. Throughput: 0: 11059.3. Samples: 169739832. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:43,976][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:45,306][626795] Updated weights for policy 0, policy_version 204972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:47,035][626795] Updated weights for policy 0, policy_version 204982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:48,751][626795] Updated weights for policy 0, policy_version 204992 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:48,976][24592] Fps is (10 sec: 48332.6, 60 sec: 42462.6, 300 sec: 43403.7). Total num frames: 1679310848. Throughput: 0: 11043.1. Samples: 169811550. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:48,977][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:50,379][626795] Updated weights for policy 0, policy_version 205002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:52,136][626795] Updated weights for policy 0, policy_version 205012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:53,889][626795] Updated weights for policy 0, policy_version 205022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:53,976][24592] Fps is (10 sec: 47512.9, 60 sec: 45027.4, 300 sec: 43348.2). Total num frames: 1679540224. Throughput: 0: 11014.6. Samples: 169882968. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:53,977][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:55,567][626795] Updated weights for policy 0, policy_version 205032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:57,237][626795] Updated weights for policy 0, policy_version 205042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:44:58,970][626795] Updated weights for policy 0, policy_version 205052 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:58,975][24592] Fps is (10 sec: 47515.2, 60 sec: 45192.5, 300 sec: 43376.0). Total num frames: 1679785984. Throughput: 0: 11034.5. Samples: 169919466. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:44:58,977][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:00,642][626795] Updated weights for policy 0, policy_version 205062 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:02,534][626795] Updated weights for policy 0, policy_version 205072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:06,472][24592] Fps is (10 sec: 34087.5, 60 sec: 42338.4, 300 sec: 42819.1). Total num frames: 1679966208. Throughput: 0: 10401.5. Samples: 169988868. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:06,476][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:06,507][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000205075_1679974400.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:06,558][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000203800_1669529600.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:07,863][626795] Updated weights for policy 0, policy_version 205082 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:08,975][24592] Fps is (10 sec: 29491.3, 60 sec: 42052.3, 300 sec: 42737.3). Total num frames: 1680080896. Throughput: 0: 10005.9. Samples: 170017506. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:08,978][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:09,668][626795] Updated weights for policy 0, policy_version 205092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:11,357][626795] Updated weights for policy 0, policy_version 205102 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:13,127][626795] Updated weights for policy 0, policy_version 205112 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:13,975][24592] Fps is (10 sec: 46948.6, 60 sec: 42052.2, 300 sec: 42765.4). Total num frames: 1680318464. Throughput: 0: 10425.7. Samples: 170052798. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:13,976][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:14,857][626795] Updated weights for policy 0, policy_version 205122 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:16,490][626795] Updated weights for policy 0, policy_version 205132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:18,218][626795] Updated weights for policy 0, policy_version 205142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:18,976][24592] Fps is (10 sec: 47511.9, 60 sec: 41915.5, 300 sec: 43292.6). Total num frames: 1680556032. Throughput: 0: 10936.2. Samples: 170124306. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:18,977][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:19,946][626795] Updated weights for policy 0, policy_version 205152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:21,677][626795] Updated weights for policy 0, policy_version 205162 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:23,319][626795] Updated weights for policy 0, policy_version 205172 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:23,975][24592] Fps is (10 sec: 47513.1, 60 sec: 41915.7, 300 sec: 43292.7). Total num frames: 1680793600. Throughput: 0: 10924.2. Samples: 170195970. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:23,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:25,097][626795] Updated weights for policy 0, policy_version 205182 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:26,803][626795] Updated weights for policy 0, policy_version 205192 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:28,522][626795] Updated weights for policy 0, policy_version 205202 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:28,976][24592] Fps is (10 sec: 48331.9, 60 sec: 44821.1, 300 sec: 43292.6). Total num frames: 1681039360. Throughput: 0: 10945.3. Samples: 170232378. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:28,977][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:30,166][626795] Updated weights for policy 0, policy_version 205212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:31,893][626795] Updated weights for policy 0, policy_version 205222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:33,599][626795] Updated weights for policy 0, policy_version 205232 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:33,976][24592] Fps is (10 sec: 48332.2, 60 sec: 44782.8, 300 sec: 43292.6). Total num frames: 1681276928. Throughput: 0: 10950.2. Samples: 170304306. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:33,977][24592] Avg episode reward: [(0, '4.972')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:35,426][626795] Updated weights for policy 0, policy_version 205242 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:37,106][626795] Updated weights for policy 0, policy_version 205252 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:41,927][24592] Fps is (10 sec: 35422.9, 60 sec: 42423.4, 300 sec: 42809.0). Total num frames: 1681498112. Throughput: 0: 10276.7. Samples: 170375748. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:41,929][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:42,051][626795] Updated weights for policy 0, policy_version 205262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:43,756][626795] Updated weights for policy 0, policy_version 205272 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:43,978][24592] Fps is (10 sec: 31939.6, 60 sec: 42186.7, 300 sec: 42764.6). Total num frames: 1681596416. Throughput: 0: 10165.0. Samples: 170376924. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:43,981][24592] Avg episode reward: [(0, '4.323')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:45,567][626795] Updated weights for policy 0, policy_version 205282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:47,222][626795] Updated weights for policy 0, policy_version 205292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:48,947][626795] Updated weights for policy 0, policy_version 205302 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:48,976][24592] Fps is (10 sec: 47648.0, 60 sec: 42052.3, 300 sec: 42765.0). Total num frames: 1681833984. Throughput: 0: 10695.3. Samples: 170443452. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:48,978][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:50,628][626795] Updated weights for policy 0, policy_version 205312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:52,419][626795] Updated weights for policy 0, policy_version 205322 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:53,975][24592] Fps is (10 sec: 47528.1, 60 sec: 42188.9, 300 sec: 43320.5). Total num frames: 1682071552. Throughput: 0: 11055.6. Samples: 170515008. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:53,976][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:54,124][626795] Updated weights for policy 0, policy_version 205332 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:55,829][626795] Updated weights for policy 0, policy_version 205342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:57,609][626795] Updated weights for policy 0, policy_version 205352 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:58,975][24592] Fps is (10 sec: 47515.0, 60 sec: 42052.3, 300 sec: 43320.4). Total num frames: 1682309120. Throughput: 0: 11053.6. Samples: 170550210. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:45:58,976][24592] Avg episode reward: [(0, '4.907')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:45:59,341][626795] Updated weights for policy 0, policy_version 205362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:00,990][626795] Updated weights for policy 0, policy_version 205372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:02,756][626795] Updated weights for policy 0, policy_version 205382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:03,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44875.5, 300 sec: 43376.0). Total num frames: 1682546688. Throughput: 0: 11061.9. Samples: 170622090. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:03,976][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:04,497][626795] Updated weights for policy 0, policy_version 205392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:06,140][626795] Updated weights for policy 0, policy_version 205402 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:07,817][626795] Updated weights for policy 0, policy_version 205412 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:08,975][24592] Fps is (10 sec: 47513.5, 60 sec: 45056.0, 300 sec: 43348.2). Total num frames: 1682784256. Throughput: 0: 11061.8. Samples: 170693748. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:08,978][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:09,596][626795] Updated weights for policy 0, policy_version 205422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:11,330][626795] Updated weights for policy 0, policy_version 205432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:13,021][626795] Updated weights for policy 0, policy_version 205442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:13,975][24592] Fps is (10 sec: 47514.0, 60 sec: 45056.0, 300 sec: 43348.2). Total num frames: 1683021824. Throughput: 0: 11049.7. Samples: 170729610. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:13,977][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:18,223][626795] Updated weights for policy 0, policy_version 205452 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:18,976][24592] Fps is (10 sec: 31128.5, 60 sec: 42325.3, 300 sec: 42792.7). Total num frames: 1683095552. Throughput: 0: 10137.8. Samples: 170760510. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:18,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:19,960][626795] Updated weights for policy 0, policy_version 205462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:21,649][626795] Updated weights for policy 0, policy_version 205472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:23,463][626795] Updated weights for policy 0, policy_version 205482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:23,975][24592] Fps is (10 sec: 31129.3, 60 sec: 42325.3, 300 sec: 42792.8). Total num frames: 1683333120. Throughput: 0: 10813.6. Samples: 170830446. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:23,976][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:25,187][626795] Updated weights for policy 0, policy_version 205492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:26,899][626795] Updated weights for policy 0, policy_version 205502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:28,524][626795] Updated weights for policy 0, policy_version 205512 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:28,975][24592] Fps is (10 sec: 47515.2, 60 sec: 42189.2, 300 sec: 43320.4). Total num frames: 1683570688. Throughput: 0: 10872.3. Samples: 170866146. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:28,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:30,313][626795] Updated weights for policy 0, policy_version 205522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:31,901][626795] Updated weights for policy 0, policy_version 205532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:33,673][626795] Updated weights for policy 0, policy_version 205542 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:33,977][24592] Fps is (10 sec: 47507.0, 60 sec: 42187.9, 300 sec: 43348.0). Total num frames: 1683808256. Throughput: 0: 10994.1. Samples: 170938200. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:33,978][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:35,369][626795] Updated weights for policy 0, policy_version 205552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:37,162][626795] Updated weights for policy 0, policy_version 205562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:38,814][626795] Updated weights for policy 0, policy_version 205572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:38,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44658.5, 300 sec: 43348.2). Total num frames: 1684045824. Throughput: 0: 11000.1. Samples: 171010014. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:38,977][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:40,473][626795] Updated weights for policy 0, policy_version 205582 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:42,186][626795] Updated weights for policy 0, policy_version 205592 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:43,968][626795] Updated weights for policy 0, policy_version 205602 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:43,975][24592] Fps is (10 sec: 48340.0, 60 sec: 44921.8, 300 sec: 43348.2). Total num frames: 1684291584. Throughput: 0: 11008.1. Samples: 171045576. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:43,976][24592] Avg episode reward: [(0, '4.388')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:45,655][626795] Updated weights for policy 0, policy_version 205612 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:47,394][626795] Updated weights for policy 0, policy_version 205622 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:48,976][24592] Fps is (10 sec: 48330.7, 60 sec: 44919.3, 300 sec: 43348.1). Total num frames: 1684529152. Throughput: 0: 11010.6. Samples: 171117570. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:48,977][24592] Avg episode reward: [(0, '4.970')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:49,087][626795] Updated weights for policy 0, policy_version 205632 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:53,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42188.8, 300 sec: 42792.8). Total num frames: 1684602880. Throughput: 0: 10122.9. Samples: 171149280. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:53,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:54,193][626795] Updated weights for policy 0, policy_version 205642 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:55,982][626795] Updated weights for policy 0, policy_version 205652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:57,694][626795] Updated weights for policy 0, policy_version 205662 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:58,975][24592] Fps is (10 sec: 31130.8, 60 sec: 42188.8, 300 sec: 42765.0). Total num frames: 1684840448. Throughput: 0: 10108.0. Samples: 171184470. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:46:58,976][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:46:59,374][626795] Updated weights for policy 0, policy_version 205672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:01,060][626795] Updated weights for policy 0, policy_version 205682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:02,768][626795] Updated weights for policy 0, policy_version 205692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:03,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42188.9, 300 sec: 43303.1). Total num frames: 1685078016. Throughput: 0: 11020.8. Samples: 171256440. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:03,976][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000205699_1685086208.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:04,036][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000204438_1674756096.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:04,555][626795] Updated weights for policy 0, policy_version 205702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:06,215][626795] Updated weights for policy 0, policy_version 205712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:07,948][626795] Updated weights for policy 0, policy_version 205722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:08,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42325.3, 300 sec: 43348.2). Total num frames: 1685323776. Throughput: 0: 11052.2. Samples: 171327792. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:08,977][24592] Avg episode reward: [(0, '4.990')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:09,636][626795] Updated weights for policy 0, policy_version 205732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:11,340][626795] Updated weights for policy 0, policy_version 205742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:13,019][626795] Updated weights for policy 0, policy_version 205752 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:13,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42325.3, 300 sec: 43348.3). Total num frames: 1685561344. Throughput: 0: 11061.5. Samples: 171363912. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:13,978][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:14,741][626795] Updated weights for policy 0, policy_version 205762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:16,495][626795] Updated weights for policy 0, policy_version 205772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:18,058][626795] Updated weights for policy 0, policy_version 205782 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:18,975][24592] Fps is (10 sec: 47513.4, 60 sec: 45056.2, 300 sec: 43320.5). Total num frames: 1685798912. Throughput: 0: 11072.2. Samples: 171436434. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:18,977][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:19,860][626795] Updated weights for policy 0, policy_version 205792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:21,449][626772] Signal inference workers to stop experience collection... (2350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:21,449][626772] Signal inference workers to resume experience collection... (2350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:21,460][626795] InferenceWorker_p0-w0: stopping experience collection (2350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:21,466][626795] InferenceWorker_p0-w0: resuming experience collection (2350 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:21,489][626795] Updated weights for policy 0, policy_version 205802 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:23,295][626795] Updated weights for policy 0, policy_version 205812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:23,976][24592] Fps is (10 sec: 47512.6, 60 sec: 45055.9, 300 sec: 43320.4). Total num frames: 1686036480. Throughput: 0: 11067.2. Samples: 171508038. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:23,984][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:25,080][626795] Updated weights for policy 0, policy_version 205822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:28,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42461.8, 300 sec: 42765.0). Total num frames: 1686118400. Throughput: 0: 10710.9. Samples: 171527568. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:28,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:30,180][626795] Updated weights for policy 0, policy_version 205832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:31,831][626795] Updated weights for policy 0, policy_version 205842 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:33,637][626795] Updated weights for policy 0, policy_version 205852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:33,976][24592] Fps is (10 sec: 31127.7, 60 sec: 42325.8, 300 sec: 42709.4). Total num frames: 1686347776. Throughput: 0: 10139.5. Samples: 171573852. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:33,977][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:35,298][626795] Updated weights for policy 0, policy_version 205862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:36,997][626795] Updated weights for policy 0, policy_version 205872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:38,740][626795] Updated weights for policy 0, policy_version 205882 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:38,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42461.8, 300 sec: 43294.7). Total num frames: 1686593536. Throughput: 0: 11040.9. Samples: 171646122. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:38,978][24592] Avg episode reward: [(0, '4.905')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:40,394][626795] Updated weights for policy 0, policy_version 205892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:42,226][626795] Updated weights for policy 0, policy_version 205902 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:43,831][626795] Updated weights for policy 0, policy_version 205912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:43,975][24592] Fps is (10 sec: 48336.4, 60 sec: 42325.3, 300 sec: 43348.2). Total num frames: 1686831104. Throughput: 0: 11047.7. Samples: 171681618. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:43,979][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:45,526][626795] Updated weights for policy 0, policy_version 205922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:47,335][626795] Updated weights for policy 0, policy_version 205932 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:48,975][24592] Fps is (10 sec: 47514.1, 60 sec: 42325.6, 300 sec: 43348.2). Total num frames: 1687068672. Throughput: 0: 11053.9. Samples: 171753864. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:48,976][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:48,995][626795] Updated weights for policy 0, policy_version 205942 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:50,719][626795] Updated weights for policy 0, policy_version 205952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:52,441][626795] Updated weights for policy 0, policy_version 205962 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:53,976][24592] Fps is (10 sec: 48332.3, 60 sec: 45192.4, 300 sec: 43403.7). Total num frames: 1687314432. Throughput: 0: 11066.1. Samples: 171825768. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:53,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:54,088][626795] Updated weights for policy 0, policy_version 205972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:55,830][626795] Updated weights for policy 0, policy_version 205982 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:57,571][626795] Updated weights for policy 0, policy_version 205992 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:58,975][24592] Fps is (10 sec: 48332.8, 60 sec: 45192.6, 300 sec: 43376.0). Total num frames: 1687552000. Throughput: 0: 11062.5. Samples: 171861726. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:47:58,976][24592] Avg episode reward: [(0, '4.980')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:47:59,311][626795] Updated weights for policy 0, policy_version 206002 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:04,166][24592] Fps is (10 sec: 32155.8, 60 sec: 42599.6, 300 sec: 42876.2). Total num frames: 1687642112. Throughput: 0: 10193.1. Samples: 171897066. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:04,168][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:04,367][626795] Updated weights for policy 0, policy_version 206012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:06,119][626795] Updated weights for policy 0, policy_version 206022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:07,837][626795] Updated weights for policy 0, policy_version 206032 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:08,975][24592] Fps is (10 sec: 31948.8, 60 sec: 42461.9, 300 sec: 42903.9). Total num frames: 1687871488. Throughput: 0: 10137.2. Samples: 171964212. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:08,976][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:09,542][626795] Updated weights for policy 0, policy_version 206042 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:11,316][626795] Updated weights for policy 0, policy_version 206052 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:12,892][626795] Updated weights for policy 0, policy_version 206062 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:13,975][24592] Fps is (10 sec: 46766.7, 60 sec: 42325.4, 300 sec: 43292.6). Total num frames: 1688100864. Throughput: 0: 10487.8. Samples: 171999516. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:13,976][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:14,689][626795] Updated weights for policy 0, policy_version 206072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:16,386][626795] Updated weights for policy 0, policy_version 206082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:18,122][626795] Updated weights for policy 0, policy_version 206092 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:18,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42461.8, 300 sec: 43348.2). Total num frames: 1688346624. Throughput: 0: 11062.3. Samples: 172071648. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:18,976][24592] Avg episode reward: [(0, '4.353')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:19,800][626795] Updated weights for policy 0, policy_version 206102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:21,535][626795] Updated weights for policy 0, policy_version 206112 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:23,268][626795] Updated weights for policy 0, policy_version 206122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:23,975][24592] Fps is (10 sec: 48332.2, 60 sec: 42462.0, 300 sec: 43320.4). Total num frames: 1688584192. Throughput: 0: 11046.9. Samples: 172143234. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:23,976][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:24,962][626795] Updated weights for policy 0, policy_version 206132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:26,714][626795] Updated weights for policy 0, policy_version 206142 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:28,305][626795] Updated weights for policy 0, policy_version 206152 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:28,976][24592] Fps is (10 sec: 47510.6, 60 sec: 45055.5, 300 sec: 43320.3). Total num frames: 1688821760. Throughput: 0: 11063.4. Samples: 172179480. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:28,977][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:30,143][626795] Updated weights for policy 0, policy_version 206162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:31,730][626795] Updated weights for policy 0, policy_version 206172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:33,401][626795] Updated weights for policy 0, policy_version 206182 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:33,975][24592] Fps is (10 sec: 47514.1, 60 sec: 45193.2, 300 sec: 43320.5). Total num frames: 1689059328. Throughput: 0: 11064.5. Samples: 172251768. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:33,978][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:35,165][626795] Updated weights for policy 0, policy_version 206192 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:39,734][24592] Fps is (10 sec: 33505.3, 60 sec: 42605.9, 300 sec: 42821.5). Total num frames: 1689182208. Throughput: 0: 10093.1. Samples: 172287612. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:39,736][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:40,231][626795] Updated weights for policy 0, policy_version 206202 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:41,997][626795] Updated weights for policy 0, policy_version 206212 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:43,710][626795] Updated weights for policy 0, policy_version 206222 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:43,975][24592] Fps is (10 sec: 32767.9, 60 sec: 42598.5, 300 sec: 42793.0). Total num frames: 1689387008. Throughput: 0: 10158.8. Samples: 172318872. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:43,977][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:45,442][626795] Updated weights for policy 0, policy_version 206232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:47,110][626795] Updated weights for policy 0, policy_version 206242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:48,795][626795] Updated weights for policy 0, policy_version 206252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:48,975][24592] Fps is (10 sec: 46981.6, 60 sec: 42461.8, 300 sec: 43308.5). Total num frames: 1689616384. Throughput: 0: 11017.8. Samples: 172390770. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:48,977][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:50,528][626795] Updated weights for policy 0, policy_version 206262 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:52,193][626795] Updated weights for policy 0, policy_version 206272 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:53,939][626795] Updated weights for policy 0, policy_version 206282 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:53,975][24592] Fps is (10 sec: 47513.3, 60 sec: 42462.0, 300 sec: 43348.2). Total num frames: 1689862144. Throughput: 0: 11082.4. Samples: 172462920. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:53,976][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:55,636][626795] Updated weights for policy 0, policy_version 206292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:57,292][626795] Updated weights for policy 0, policy_version 206302 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:58,976][24592] Fps is (10 sec: 49149.7, 60 sec: 42598.0, 300 sec: 43348.1). Total num frames: 1690107904. Throughput: 0: 11098.0. Samples: 172498932. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:48:58,977][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:48:58,980][626795] Updated weights for policy 0, policy_version 206312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:00,740][626795] Updated weights for policy 0, policy_version 206322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:02,459][626795] Updated weights for policy 0, policy_version 206332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:03,975][24592] Fps is (10 sec: 48332.6, 60 sec: 45199.5, 300 sec: 43348.2). Total num frames: 1690345472. Throughput: 0: 11104.4. Samples: 172571346. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:03,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000206341_1690345472.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:04,029][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000205075_1679974400.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:04,070][626795] Updated weights for policy 0, policy_version 206342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:05,925][626795] Updated weights for policy 0, policy_version 206352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:07,570][626795] Updated weights for policy 0, policy_version 206362 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:08,975][24592] Fps is (10 sec: 47516.0, 60 sec: 45192.5, 300 sec: 43348.2). Total num frames: 1690583040. Throughput: 0: 11103.4. Samples: 172642884. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:08,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:09,304][626795] Updated weights for policy 0, policy_version 206372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:11,118][626795] Updated weights for policy 0, policy_version 206382 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:15,300][24592] Fps is (10 sec: 33275.7, 60 sec: 42746.9, 300 sec: 42795.0). Total num frames: 1690722304. Throughput: 0: 10761.5. Samples: 172677996. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:15,301][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:16,090][626795] Updated weights for policy 0, policy_version 206392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:17,826][626795] Updated weights for policy 0, policy_version 206402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:18,980][24592] Fps is (10 sec: 31114.4, 60 sec: 42458.5, 300 sec: 42764.3). Total num frames: 1690894336. Throughput: 0: 10177.8. Samples: 172709820. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:18,981][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:19,606][626795] Updated weights for policy 0, policy_version 206412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:21,271][626795] Updated weights for policy 0, policy_version 206422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:22,956][626795] Updated weights for policy 0, policy_version 206432 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:23,975][24592] Fps is (10 sec: 48157.6, 60 sec: 42598.3, 300 sec: 43346.6). Total num frames: 1691140096. Throughput: 0: 11164.2. Samples: 172781532. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:23,976][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:24,639][626795] Updated weights for policy 0, policy_version 206442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:26,331][626795] Updated weights for policy 0, policy_version 206452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:28,149][626795] Updated weights for policy 0, policy_version 206462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:28,975][24592] Fps is (10 sec: 48356.5, 60 sec: 42598.9, 300 sec: 43348.2). Total num frames: 1691377664. Throughput: 0: 11071.9. Samples: 172817106. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:28,976][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:29,797][626795] Updated weights for policy 0, policy_version 206472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:31,534][626795] Updated weights for policy 0, policy_version 206482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:33,184][626795] Updated weights for policy 0, policy_version 206492 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:33,975][24592] Fps is (10 sec: 47514.4, 60 sec: 42598.4, 300 sec: 43348.2). Total num frames: 1691615232. Throughput: 0: 11090.4. Samples: 172889838. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:33,976][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:34,882][626795] Updated weights for policy 0, policy_version 206502 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:36,618][626795] Updated weights for policy 0, policy_version 206512 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:38,268][626795] Updated weights for policy 0, policy_version 206522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:38,976][24592] Fps is (10 sec: 48331.0, 60 sec: 45217.9, 300 sec: 43375.9). Total num frames: 1691860992. Throughput: 0: 11092.5. Samples: 172962084. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:38,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:40,001][626795] Updated weights for policy 0, policy_version 206532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:41,714][626795] Updated weights for policy 0, policy_version 206542 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:43,495][626795] Updated weights for policy 0, policy_version 206552 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:43,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45056.0, 300 sec: 43320.5). Total num frames: 1692090368. Throughput: 0: 11088.3. Samples: 172997898. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:43,977][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:45,217][626795] Updated weights for policy 0, policy_version 206562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:46,880][626795] Updated weights for policy 0, policy_version 206572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:50,870][24592] Fps is (10 sec: 34438.0, 60 sec: 42883.0, 300 sec: 42878.5). Total num frames: 1692270592. Throughput: 0: 9857.9. Samples: 173033622. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:50,870][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:51,998][626795] Updated weights for policy 0, policy_version 206582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:53,636][626795] Updated weights for policy 0, policy_version 206592 (0.0033)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:53,976][24592] Fps is (10 sec: 31948.1, 60 sec: 42461.8, 300 sec: 42792.8). Total num frames: 1692409856. Throughput: 0: 10174.8. Samples: 173100750. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:53,977][24592] Avg episode reward: [(0, '4.840')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:55,395][626795] Updated weights for policy 0, policy_version 206602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:57,155][626795] Updated weights for policy 0, policy_version 206612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:49:58,928][626795] Updated weights for policy 0, policy_version 206622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:58,975][24592] Fps is (10 sec: 46488.9, 60 sec: 42325.7, 300 sec: 43354.1). Total num frames: 1692647424. Throughput: 0: 10486.8. Samples: 173136012. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:49:58,976][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:00,459][626795] Updated weights for policy 0, policy_version 206632 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:02,253][626795] Updated weights for policy 0, policy_version 206642 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:03,976][24592] Fps is (10 sec: 47513.5, 60 sec: 42325.2, 300 sec: 43403.7). Total num frames: 1692884992. Throughput: 0: 11079.3. Samples: 173208336. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:03,978][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:04,014][626795] Updated weights for policy 0, policy_version 206652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:05,637][626795] Updated weights for policy 0, policy_version 206662 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:07,345][626795] Updated weights for policy 0, policy_version 206672 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:08,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42461.9, 300 sec: 43431.5). Total num frames: 1693130752. Throughput: 0: 11081.9. Samples: 173280216. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:08,978][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:09,144][626795] Updated weights for policy 0, policy_version 206682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:10,777][626795] Updated weights for policy 0, policy_version 206692 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:12,457][626795] Updated weights for policy 0, policy_version 206702 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:13,976][24592] Fps is (10 sec: 48333.4, 60 sec: 45095.8, 300 sec: 43431.5). Total num frames: 1693368320. Throughput: 0: 11096.6. Samples: 173316456. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:13,978][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:14,246][626795] Updated weights for policy 0, policy_version 206712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:15,947][626795] Updated weights for policy 0, policy_version 206722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:17,591][626795] Updated weights for policy 0, policy_version 206732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:18,976][24592] Fps is (10 sec: 47513.2, 60 sec: 45196.2, 300 sec: 43431.5). Total num frames: 1693605888. Throughput: 0: 11076.6. Samples: 173388288. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:18,978][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:19,362][626795] Updated weights for policy 0, policy_version 206742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:21,108][626795] Updated weights for policy 0, policy_version 206752 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:22,755][626795] Updated weights for policy 0, policy_version 206762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:26,453][24592] Fps is (10 sec: 34797.9, 60 sec: 42614.1, 300 sec: 42904.7). Total num frames: 1693802496. Throughput: 0: 9727.6. Samples: 173423922. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:26,453][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:27,899][626795] Updated weights for policy 0, policy_version 206772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:28,976][24592] Fps is (10 sec: 31947.5, 60 sec: 42461.5, 300 sec: 42876.1). Total num frames: 1693925376. Throughput: 0: 10158.7. Samples: 173455044. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:28,977][24592] Avg episode reward: [(0, '4.965')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:29,563][626795] Updated weights for policy 0, policy_version 206782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:31,344][626795] Updated weights for policy 0, policy_version 206792 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:32,998][626795] Updated weights for policy 0, policy_version 206802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:33,975][24592] Fps is (10 sec: 47913.7, 60 sec: 42461.8, 300 sec: 43365.5). Total num frames: 1694162944. Throughput: 0: 11444.9. Samples: 173526966. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:33,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:34,781][626795] Updated weights for policy 0, policy_version 206812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:36,369][626795] Updated weights for policy 0, policy_version 206822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:38,112][626795] Updated weights for policy 0, policy_version 206832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:38,976][24592] Fps is (10 sec: 48334.1, 60 sec: 42462.0, 300 sec: 43431.9). Total num frames: 1694408704. Throughput: 0: 11074.8. Samples: 173599116. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:38,976][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:39,801][626795] Updated weights for policy 0, policy_version 206842 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:41,467][626795] Updated weights for policy 0, policy_version 206852 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:43,204][626795] Updated weights for policy 0, policy_version 206862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:43,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42598.4, 300 sec: 43431.5). Total num frames: 1694646272. Throughput: 0: 11085.7. Samples: 173634870. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:43,977][24592] Avg episode reward: [(0, '4.801')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:44,941][626795] Updated weights for policy 0, policy_version 206872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:46,586][626795] Updated weights for policy 0, policy_version 206882 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:48,387][626795] Updated weights for policy 0, policy_version 206892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:48,975][24592] Fps is (10 sec: 47514.5, 60 sec: 44973.9, 300 sec: 43431.5). Total num frames: 1694883840. Throughput: 0: 11087.5. Samples: 173707272. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:48,976][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:50,151][626795] Updated weights for policy 0, policy_version 206902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:51,786][626795] Updated weights for policy 0, policy_version 206912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:53,534][626795] Updated weights for policy 0, policy_version 206922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:53,975][24592] Fps is (10 sec: 47513.3, 60 sec: 45192.6, 300 sec: 43431.5). Total num frames: 1695121408. Throughput: 0: 11066.2. Samples: 173778198. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:50:53,977][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:55,252][626795] Updated weights for policy 0, policy_version 206932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:56,922][626795] Updated weights for policy 0, policy_version 206942 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:50:58,698][626795] Updated weights for policy 0, policy_version 206952 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:02,028][24592] Fps is (10 sec: 35774.8, 60 sec: 42874.9, 300 sec: 42959.2). Total num frames: 1695350784. Throughput: 0: 10356.5. Samples: 173814108. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:02,029][24592] Avg episode reward: [(0, '5.073')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:03,795][626795] Updated weights for policy 0, policy_version 206962 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:03,975][24592] Fps is (10 sec: 31949.1, 60 sec: 42598.6, 300 sec: 42903.9). Total num frames: 1695440896. Throughput: 0: 10160.1. Samples: 173845494. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:03,976][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000206963_1695440896.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:04,059][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000205699_1685086208.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:05,600][626795] Updated weights for policy 0, policy_version 206972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:07,191][626795] Updated weights for policy 0, policy_version 206982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:08,942][626795] Updated weights for policy 0, policy_version 206992 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:08,976][24592] Fps is (10 sec: 47162.7, 60 sec: 42461.7, 300 sec: 42903.8). Total num frames: 1695678464. Throughput: 0: 11598.6. Samples: 173917128. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:08,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:10,509][626795] Updated weights for policy 0, policy_version 207002 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:12,227][626795] Updated weights for policy 0, policy_version 207012 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:13,976][24592] Fps is (10 sec: 47512.3, 60 sec: 42461.8, 300 sec: 43459.3). Total num frames: 1695916032. Throughput: 0: 11067.4. Samples: 173953074. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:13,976][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:13,983][626795] Updated weights for policy 0, policy_version 207022 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:15,724][626795] Updated weights for policy 0, policy_version 207032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:17,369][626795] Updated weights for policy 0, policy_version 207042 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:18,975][24592] Fps is (10 sec: 48333.7, 60 sec: 42598.4, 300 sec: 43487.0). Total num frames: 1696161792. Throughput: 0: 11078.9. Samples: 174025518. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:18,976][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:19,138][626795] Updated weights for policy 0, policy_version 207052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:20,806][626795] Updated weights for policy 0, policy_version 207062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:22,563][626795] Updated weights for policy 0, policy_version 207072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:23,975][24592] Fps is (10 sec: 48333.7, 60 sec: 45144.9, 300 sec: 43487.0). Total num frames: 1696399360. Throughput: 0: 11082.7. Samples: 174097836. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:23,977][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:24,184][626795] Updated weights for policy 0, policy_version 207082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:25,915][626795] Updated weights for policy 0, policy_version 207092 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:27,646][626795] Updated weights for policy 0, policy_version 207102 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:28,976][24592] Fps is (10 sec: 47512.8, 60 sec: 45192.8, 300 sec: 43487.2). Total num frames: 1696636928. Throughput: 0: 11080.8. Samples: 174133506. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:28,978][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:29,327][626795] Updated weights for policy 0, policy_version 207112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:31,070][626795] Updated weights for policy 0, policy_version 207122 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:32,795][626795] Updated weights for policy 0, policy_version 207132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:33,975][24592] Fps is (10 sec: 48333.1, 60 sec: 45329.1, 300 sec: 43514.8). Total num frames: 1696882688. Throughput: 0: 11082.7. Samples: 174205992. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:33,978][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:37,773][626795] Updated weights for policy 0, policy_version 207142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:38,975][24592] Fps is (10 sec: 31949.1, 60 sec: 42461.9, 300 sec: 42931.6). Total num frames: 1696956416. Throughput: 0: 10208.5. Samples: 174237582. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:38,979][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:39,501][626795] Updated weights for policy 0, policy_version 207152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:41,349][626795] Updated weights for policy 0, policy_version 207162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:42,952][626795] Updated weights for policy 0, policy_version 207172 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:43,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42598.4, 300 sec: 42959.5). Total num frames: 1697202176. Throughput: 0: 10934.9. Samples: 174272802. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:43,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:44,628][626795] Updated weights for policy 0, policy_version 207182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:46,373][626795] Updated weights for policy 0, policy_version 207192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:48,096][626795] Updated weights for policy 0, policy_version 207202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:48,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42598.3, 300 sec: 43514.8). Total num frames: 1697439744. Throughput: 0: 11105.8. Samples: 174345258. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:48,977][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:49,720][626795] Updated weights for policy 0, policy_version 207212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:51,480][626795] Updated weights for policy 0, policy_version 207222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:53,218][626795] Updated weights for policy 0, policy_version 207232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:53,976][24592] Fps is (10 sec: 48331.8, 60 sec: 42734.8, 300 sec: 43542.5). Total num frames: 1697685504. Throughput: 0: 11121.1. Samples: 174417576. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:53,977][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:54,932][626795] Updated weights for policy 0, policy_version 207242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:56,587][626795] Updated weights for policy 0, policy_version 207252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:58,252][626795] Updated weights for policy 0, policy_version 207262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:58,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45169.3, 300 sec: 43542.6). Total num frames: 1697923072. Throughput: 0: 11115.5. Samples: 174453270. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:51:58,978][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:51:59,988][626795] Updated weights for policy 0, policy_version 207272 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:01,703][626795] Updated weights for policy 0, policy_version 207282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:03,297][626795] Updated weights for policy 0, policy_version 207292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:03,975][24592] Fps is (10 sec: 48333.7, 60 sec: 45465.5, 300 sec: 43542.6). Total num frames: 1698168832. Throughput: 0: 11132.9. Samples: 174526500. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:03,976][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:05,016][626795] Updated weights for policy 0, policy_version 207302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:06,749][626795] Updated weights for policy 0, policy_version 207312 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:08,468][626795] Updated weights for policy 0, policy_version 207322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:08,975][24592] Fps is (10 sec: 48333.2, 60 sec: 45465.8, 300 sec: 43542.6). Total num frames: 1698406400. Throughput: 0: 11129.4. Samples: 174598656. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:08,976][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:13,464][626795] Updated weights for policy 0, policy_version 207332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:13,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42735.1, 300 sec: 42987.2). Total num frames: 1698480128. Throughput: 0: 10668.3. Samples: 174613578. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:13,977][24592] Avg episode reward: [(0, '4.947')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:15,257][626795] Updated weights for policy 0, policy_version 207342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:16,955][626795] Updated weights for policy 0, policy_version 207352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:18,516][626795] Updated weights for policy 0, policy_version 207362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:18,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42735.0, 300 sec: 43015.0). Total num frames: 1698725888. Throughput: 0: 10240.3. Samples: 174666804. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:18,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:20,307][626795] Updated weights for policy 0, policy_version 207372 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:22,007][626795] Updated weights for policy 0, policy_version 207382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:23,714][626795] Updated weights for policy 0, policy_version 207392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:23,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42735.0, 300 sec: 43542.6). Total num frames: 1698963456. Throughput: 0: 11145.1. Samples: 174739110. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:23,976][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:25,448][626795] Updated weights for policy 0, policy_version 207402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:27,130][626795] Updated weights for policy 0, policy_version 207412 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:28,733][626795] Updated weights for policy 0, policy_version 207422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:28,976][24592] Fps is (10 sec: 48330.3, 60 sec: 42871.2, 300 sec: 43598.1). Total num frames: 1699209216. Throughput: 0: 11161.5. Samples: 174775074. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:28,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:30,538][626795] Updated weights for policy 0, policy_version 207432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:32,233][626795] Updated weights for policy 0, policy_version 207442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:33,979][24592] Fps is (10 sec: 47494.9, 60 sec: 42595.6, 300 sec: 43542.0). Total num frames: 1699438592. Throughput: 0: 11150.6. Samples: 174847080. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:33,983][24592] Avg episode reward: [(0, '5.153')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:34,098][626795] Updated weights for policy 0, policy_version 207452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:35,680][626795] Updated weights for policy 0, policy_version 207462 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:37,370][626795] Updated weights for policy 0, policy_version 207472 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:38,975][24592] Fps is (10 sec: 47516.0, 60 sec: 45465.7, 300 sec: 43570.3). Total num frames: 1699684352. Throughput: 0: 11139.4. Samples: 174918846. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:38,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:39,077][626795] Updated weights for policy 0, policy_version 207482 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:40,778][626795] Updated weights for policy 0, policy_version 207492 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:42,455][626795] Updated weights for policy 0, policy_version 207502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:43,975][24592] Fps is (10 sec: 49171.5, 60 sec: 45465.6, 300 sec: 43598.1). Total num frames: 1699930112. Throughput: 0: 11143.1. Samples: 174954708. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:43,978][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:44,229][626795] Updated weights for policy 0, policy_version 207512 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:44,948][626772] Signal inference workers to stop experience collection... (2400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:44,955][626772] Signal inference workers to resume experience collection... (2400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:44,968][626795] InferenceWorker_p0-w0: stopping experience collection (2400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:44,968][626795] InferenceWorker_p0-w0: resuming experience collection (2400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:48,976][24592] Fps is (10 sec: 31128.6, 60 sec: 42598.2, 300 sec: 42987.2). Total num frames: 1699995648. Throughput: 0: 10530.6. Samples: 175000380. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:48,978][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:49,297][626795] Updated weights for policy 0, policy_version 207522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:51,026][626795] Updated weights for policy 0, policy_version 207532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:52,734][626795] Updated weights for policy 0, policy_version 207542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:53,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42598.6, 300 sec: 43014.9). Total num frames: 1700241408. Throughput: 0: 10214.1. Samples: 175058292. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:53,976][24592] Avg episode reward: [(0, '4.347')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:54,397][626795] Updated weights for policy 0, policy_version 207552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:56,070][626795] Updated weights for policy 0, policy_version 207562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:57,878][626795] Updated weights for policy 0, policy_version 207572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:58,976][24592] Fps is (10 sec: 48332.5, 60 sec: 42598.1, 300 sec: 43542.9). Total num frames: 1700478976. Throughput: 0: 10671.5. Samples: 175093800. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:52:58,976][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:52:59,490][626795] Updated weights for policy 0, policy_version 207582 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:01,252][626795] Updated weights for policy 0, policy_version 207592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:02,949][626795] Updated weights for policy 0, policy_version 207602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:03,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42598.4, 300 sec: 43570.3). Total num frames: 1700724736. Throughput: 0: 11091.1. Samples: 175165902. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:03,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000207608_1700724736.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:04,066][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000206341_1690345472.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:04,827][626795] Updated weights for policy 0, policy_version 207612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:06,366][626795] Updated weights for policy 0, policy_version 207622 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:08,147][626795] Updated weights for policy 0, policy_version 207632 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:08,975][24592] Fps is (10 sec: 48334.5, 60 sec: 42598.3, 300 sec: 43598.1). Total num frames: 1700962304. Throughput: 0: 11073.2. Samples: 175237404. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:08,976][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:09,890][626795] Updated weights for policy 0, policy_version 207642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:11,586][626795] Updated weights for policy 0, policy_version 207652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:13,214][626795] Updated weights for policy 0, policy_version 207662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:13,975][24592] Fps is (10 sec: 47513.9, 60 sec: 45329.2, 300 sec: 43570.3). Total num frames: 1701199872. Throughput: 0: 11068.9. Samples: 175273170. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:13,976][24592] Avg episode reward: [(0, '4.872')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:14,983][626795] Updated weights for policy 0, policy_version 207672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:16,716][626795] Updated weights for policy 0, policy_version 207682 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:18,417][626795] Updated weights for policy 0, policy_version 207692 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:18,975][24592] Fps is (10 sec: 47513.5, 60 sec: 45192.5, 300 sec: 43570.3). Total num frames: 1701437440. Throughput: 0: 11063.0. Samples: 175344870. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:18,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:20,111][626795] Updated weights for policy 0, policy_version 207702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:24,336][24592] Fps is (10 sec: 32417.1, 60 sec: 42615.2, 300 sec: 43045.7). Total num frames: 1701535744. Throughput: 0: 10188.4. Samples: 175381002. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:24,337][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:25,144][626795] Updated weights for policy 0, policy_version 207712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:26,894][626795] Updated weights for policy 0, policy_version 207722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:28,574][626795] Updated weights for policy 0, policy_version 207732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:28,975][24592] Fps is (10 sec: 31948.9, 60 sec: 42462.2, 300 sec: 43042.7). Total num frames: 1701756928. Throughput: 0: 10181.1. Samples: 175412856. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:28,978][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:30,306][626795] Updated weights for policy 0, policy_version 207742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:31,889][626795] Updated weights for policy 0, policy_version 207752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:33,715][626795] Updated weights for policy 0, policy_version 207762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:33,975][24592] Fps is (10 sec: 48442.6, 60 sec: 42737.7, 300 sec: 43571.3). Total num frames: 1702002688. Throughput: 0: 10760.7. Samples: 175484610. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:33,977][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:35,287][626795] Updated weights for policy 0, policy_version 207772 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:37,065][626795] Updated weights for policy 0, policy_version 207782 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:38,817][626795] Updated weights for policy 0, policy_version 207792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:38,976][24592] Fps is (10 sec: 48331.5, 60 sec: 42598.2, 300 sec: 43570.3). Total num frames: 1702240256. Throughput: 0: 11083.9. Samples: 175557072. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:38,978][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:40,451][626795] Updated weights for policy 0, policy_version 207802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:42,101][626795] Updated weights for policy 0, policy_version 207812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:43,856][626795] Updated weights for policy 0, policy_version 207822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:43,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42461.8, 300 sec: 43598.1). Total num frames: 1702477824. Throughput: 0: 11099.7. Samples: 175593282. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:43,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:45,498][626795] Updated weights for policy 0, policy_version 207832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:47,171][626795] Updated weights for policy 0, policy_version 207842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:48,915][626795] Updated weights for policy 0, policy_version 207852 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:48,975][24592] Fps is (10 sec: 48334.0, 60 sec: 45465.8, 300 sec: 43598.1). Total num frames: 1702723584. Throughput: 0: 11129.2. Samples: 175666716. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:48,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:50,638][626795] Updated weights for policy 0, policy_version 207862 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:52,420][626795] Updated weights for policy 0, policy_version 207872 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:53,976][24592] Fps is (10 sec: 48332.2, 60 sec: 45328.9, 300 sec: 43570.4). Total num frames: 1702961152. Throughput: 0: 11101.3. Samples: 175736964. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:53,977][24592] Avg episode reward: [(0, '4.849')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:54,110][626795] Updated weights for policy 0, policy_version 207882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:53:55,886][626795] Updated weights for policy 0, policy_version 207892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:59,907][24592] Fps is (10 sec: 32973.5, 60 sec: 42754.1, 300 sec: 43045.7). Total num frames: 1703084032. Throughput: 0: 10097.5. Samples: 175736964. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:53:59,908][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:00,985][626795] Updated weights for policy 0, policy_version 207902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:02,762][626795] Updated weights for policy 0, policy_version 207912 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:03,975][24592] Fps is (10 sec: 31130.2, 60 sec: 42461.9, 300 sec: 43014.9). Total num frames: 1703272448. Throughput: 0: 10193.5. Samples: 175803576. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:03,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:04,400][626795] Updated weights for policy 0, policy_version 207922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:06,087][626795] Updated weights for policy 0, policy_version 207932 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:07,790][626795] Updated weights for policy 0, policy_version 207942 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:08,975][24592] Fps is (10 sec: 47877.3, 60 sec: 42598.4, 300 sec: 43571.6). Total num frames: 1703518208. Throughput: 0: 11095.4. Samples: 175876290. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:08,977][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:09,553][626795] Updated weights for policy 0, policy_version 207952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:11,184][626795] Updated weights for policy 0, policy_version 207962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:12,878][626795] Updated weights for policy 0, policy_version 207972 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:13,975][24592] Fps is (10 sec: 48332.6, 60 sec: 42598.3, 300 sec: 43598.8). Total num frames: 1703755776. Throughput: 0: 11099.2. Samples: 175912320. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:13,976][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:14,589][626795] Updated weights for policy 0, policy_version 207982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:16,190][626795] Updated weights for policy 0, policy_version 207992 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:17,940][626795] Updated weights for policy 0, policy_version 208002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:18,975][24592] Fps is (10 sec: 48332.2, 60 sec: 42734.9, 300 sec: 43598.1). Total num frames: 1704001536. Throughput: 0: 11133.3. Samples: 175985610. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:18,978][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:19,593][626795] Updated weights for policy 0, policy_version 208012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:21,302][626795] Updated weights for policy 0, policy_version 208022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:22,922][626795] Updated weights for policy 0, policy_version 208032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:23,976][24592] Fps is (10 sec: 48331.8, 60 sec: 45328.5, 300 sec: 43598.1). Total num frames: 1704239104. Throughput: 0: 11134.3. Samples: 176058114. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:23,976][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:24,721][626795] Updated weights for policy 0, policy_version 208042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:26,386][626795] Updated weights for policy 0, policy_version 208052 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:28,095][626795] Updated weights for policy 0, policy_version 208062 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:28,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45465.6, 300 sec: 43625.9). Total num frames: 1704484864. Throughput: 0: 11138.5. Samples: 176094516. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:28,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:29,844][626795] Updated weights for policy 0, policy_version 208072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:31,451][626795] Updated weights for policy 0, policy_version 208082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:35,520][24592] Fps is (10 sec: 34062.1, 60 sec: 42727.5, 300 sec: 43067.2). Total num frames: 1704632320. Throughput: 0: 9953.4. Samples: 176129988. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:35,521][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:36,527][626795] Updated weights for policy 0, policy_version 208092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:38,246][626795] Updated weights for policy 0, policy_version 208102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:38,975][24592] Fps is (10 sec: 31949.2, 60 sec: 42735.2, 300 sec: 43098.3). Total num frames: 1704804352. Throughput: 0: 10245.4. Samples: 176198004. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:38,976][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:40,027][626795] Updated weights for policy 0, policy_version 208112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:41,634][626795] Updated weights for policy 0, policy_version 208122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:43,410][626795] Updated weights for policy 0, policy_version 208132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:43,975][24592] Fps is (10 sec: 48440.9, 60 sec: 42735.0, 300 sec: 43572.4). Total num frames: 1705041920. Throughput: 0: 11278.4. Samples: 176233986. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:43,977][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:45,022][626795] Updated weights for policy 0, policy_version 208142 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:46,791][626795] Updated weights for policy 0, policy_version 208152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:48,377][626795] Updated weights for policy 0, policy_version 208162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:48,975][24592] Fps is (10 sec: 48332.4, 60 sec: 42734.9, 300 sec: 43653.7). Total num frames: 1705287680. Throughput: 0: 11186.7. Samples: 176306976. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:48,976][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:50,147][626795] Updated weights for policy 0, policy_version 208172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:51,825][626795] Updated weights for policy 0, policy_version 208182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:53,608][626795] Updated weights for policy 0, policy_version 208192 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:53,975][24592] Fps is (10 sec: 48332.3, 60 sec: 42735.0, 300 sec: 43653.6). Total num frames: 1705525248. Throughput: 0: 11174.1. Samples: 176379126. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:53,978][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:55,206][626795] Updated weights for policy 0, policy_version 208202 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:56,976][626795] Updated weights for policy 0, policy_version 208212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:54:58,727][626795] Updated weights for policy 0, policy_version 208222 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:58,976][24592] Fps is (10 sec: 48332.0, 60 sec: 45489.0, 300 sec: 43681.4). Total num frames: 1705771008. Throughput: 0: 11177.0. Samples: 176415288. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:54:58,976][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:00,331][626795] Updated weights for policy 0, policy_version 208232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:02,010][626795] Updated weights for policy 0, policy_version 208242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:03,776][626795] Updated weights for policy 0, policy_version 208252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:03,975][24592] Fps is (10 sec: 48333.5, 60 sec: 45602.2, 300 sec: 43653.6). Total num frames: 1706008576. Throughput: 0: 11146.3. Samples: 176487192. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:03,976][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000208253_1706008576.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:04,039][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000206963_1695440896.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:05,581][626795] Updated weights for policy 0, policy_version 208262 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:07,165][626795] Updated weights for policy 0, policy_version 208272 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:11,129][24592] Fps is (10 sec: 33703.6, 60 sec: 42836.1, 300 sec: 43116.8). Total num frames: 1706180608. Throughput: 0: 9852.0. Samples: 176522664. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:11,129][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:12,312][626795] Updated weights for policy 0, policy_version 208282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:13,975][24592] Fps is (10 sec: 31129.4, 60 sec: 42734.9, 300 sec: 43098.3). Total num frames: 1706319872. Throughput: 0: 10216.8. Samples: 176554272. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:13,976][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:13,985][626795] Updated weights for policy 0, policy_version 208292 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:15,790][626795] Updated weights for policy 0, policy_version 208302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:17,482][626795] Updated weights for policy 0, policy_version 208312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:18,976][24592] Fps is (10 sec: 48021.8, 60 sec: 42598.3, 300 sec: 43603.2). Total num frames: 1706557440. Throughput: 0: 11402.8. Samples: 176625510. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:18,978][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:19,155][626795] Updated weights for policy 0, policy_version 208322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:20,875][626795] Updated weights for policy 0, policy_version 208332 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:22,613][626795] Updated weights for policy 0, policy_version 208342 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:23,976][24592] Fps is (10 sec: 48328.2, 60 sec: 42734.4, 300 sec: 43653.6). Total num frames: 1706803200. Throughput: 0: 11118.1. Samples: 176698332. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:23,978][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:24,211][626795] Updated weights for policy 0, policy_version 208352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:25,981][626795] Updated weights for policy 0, policy_version 208362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:27,643][626795] Updated weights for policy 0, policy_version 208372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:28,975][24592] Fps is (10 sec: 48334.3, 60 sec: 42598.4, 300 sec: 43653.6). Total num frames: 1707040768. Throughput: 0: 11112.4. Samples: 176734044. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:28,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:29,390][626795] Updated weights for policy 0, policy_version 208382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:31,059][626795] Updated weights for policy 0, policy_version 208392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:32,773][626795] Updated weights for policy 0, policy_version 208402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:33,975][24592] Fps is (10 sec: 47518.4, 60 sec: 45265.4, 300 sec: 43625.9). Total num frames: 1707278336. Throughput: 0: 11105.5. Samples: 176806722. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:33,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:34,487][626795] Updated weights for policy 0, policy_version 208412 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:36,158][626795] Updated weights for policy 0, policy_version 208422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:37,953][626795] Updated weights for policy 0, policy_version 208432 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:38,975][24592] Fps is (10 sec: 48333.1, 60 sec: 45329.1, 300 sec: 43653.7). Total num frames: 1707524096. Throughput: 0: 11106.7. Samples: 176878926. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:38,976][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:39,531][626795] Updated weights for policy 0, policy_version 208442 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:41,312][626795] Updated weights for policy 0, policy_version 208452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:42,994][626795] Updated weights for policy 0, policy_version 208462 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:46,759][24592] Fps is (10 sec: 35885.0, 60 sec: 42927.7, 300 sec: 43163.0). Total num frames: 1707737088. Throughput: 0: 10453.5. Samples: 176914794. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:46,761][24592] Avg episode reward: [(0, '5.131')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:47,959][626795] Updated weights for policy 0, policy_version 208472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:48,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42461.9, 300 sec: 43098.3). Total num frames: 1707835392. Throughput: 0: 10195.6. Samples: 176945994. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:48,976][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:49,880][626795] Updated weights for policy 0, policy_version 208482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:51,718][626795] Updated weights for policy 0, policy_version 208492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:53,413][626795] Updated weights for policy 0, policy_version 208502 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:53,975][24592] Fps is (10 sec: 46544.8, 60 sec: 42461.9, 300 sec: 43576.9). Total num frames: 1708072960. Throughput: 0: 11495.7. Samples: 177015216. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:53,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:55,259][626795] Updated weights for policy 0, policy_version 208512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:57,047][626795] Updated weights for policy 0, policy_version 208522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:55:58,718][626795] Updated weights for policy 0, policy_version 208532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:58,975][24592] Fps is (10 sec: 46693.6, 60 sec: 42188.9, 300 sec: 43598.1). Total num frames: 1708302336. Throughput: 0: 10999.0. Samples: 177049230. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:55:58,977][24592] Avg episode reward: [(0, '4.971')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:00,383][626795] Updated weights for policy 0, policy_version 208542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:02,079][626795] Updated weights for policy 0, policy_version 208552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:03,792][626795] Updated weights for policy 0, policy_version 208562 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:03,976][24592] Fps is (10 sec: 47510.5, 60 sec: 42324.8, 300 sec: 43625.8). Total num frames: 1708548096. Throughput: 0: 11028.6. Samples: 177121800. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:03,977][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:05,493][626795] Updated weights for policy 0, policy_version 208572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:07,119][626795] Updated weights for policy 0, policy_version 208582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:08,833][626795] Updated weights for policy 0, policy_version 208592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:08,976][24592] Fps is (10 sec: 48332.8, 60 sec: 45033.6, 300 sec: 43625.9). Total num frames: 1708785664. Throughput: 0: 11025.3. Samples: 177194460. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:08,976][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:10,547][626795] Updated weights for policy 0, policy_version 208602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:12,294][626795] Updated weights for policy 0, policy_version 208612 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:13,853][626795] Updated weights for policy 0, policy_version 208622 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:13,975][24592] Fps is (10 sec: 48335.7, 60 sec: 45192.5, 300 sec: 43625.9). Total num frames: 1709031424. Throughput: 0: 11042.1. Samples: 177230940. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:13,977][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:15,620][626795] Updated weights for policy 0, policy_version 208632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:17,384][626795] Updated weights for policy 0, policy_version 208642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:18,976][24592] Fps is (10 sec: 48332.2, 60 sec: 45192.6, 300 sec: 43625.8). Total num frames: 1709268992. Throughput: 0: 11040.7. Samples: 177303558. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:18,977][24592] Avg episode reward: [(0, '4.973')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:22,279][626795] Updated weights for policy 0, policy_version 208652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:23,870][626795] Updated weights for policy 0, policy_version 208662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:23,977][24592] Fps is (10 sec: 32762.9, 60 sec: 42597.9, 300 sec: 43125.8). Total num frames: 1709359104. Throughput: 0: 10179.4. Samples: 177337014. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:23,978][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:25,760][626795] Updated weights for policy 0, policy_version 208672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:27,585][626795] Updated weights for policy 0, policy_version 208682 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:28,975][24592] Fps is (10 sec: 32768.9, 60 sec: 42598.4, 300 sec: 43098.3). Total num frames: 1709596672. Throughput: 0: 10821.9. Samples: 177371652. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:28,977][24592] Avg episode reward: [(0, '4.873')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:29,139][626795] Updated weights for policy 0, policy_version 208692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:30,942][626795] Updated weights for policy 0, policy_version 208702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:32,633][626795] Updated weights for policy 0, policy_version 208712 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:33,976][24592] Fps is (10 sec: 46700.7, 60 sec: 42461.6, 300 sec: 43625.8). Total num frames: 1709826048. Throughput: 0: 11039.0. Samples: 177442752. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:33,977][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:34,365][626795] Updated weights for policy 0, policy_version 208722 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:36,065][626795] Updated weights for policy 0, policy_version 208732 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:37,780][626795] Updated weights for policy 0, policy_version 208742 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:38,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42461.8, 300 sec: 43625.9). Total num frames: 1710071808. Throughput: 0: 11122.1. Samples: 177515712. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:38,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:39,335][626795] Updated weights for policy 0, policy_version 208752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:41,176][626795] Updated weights for policy 0, policy_version 208762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:42,857][626795] Updated weights for policy 0, policy_version 208772 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:43,975][24592] Fps is (10 sec: 48334.0, 60 sec: 44957.4, 300 sec: 43625.9). Total num frames: 1710309376. Throughput: 0: 11158.6. Samples: 177551364. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:43,978][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:44,417][626795] Updated weights for policy 0, policy_version 208782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:46,209][626795] Updated weights for policy 0, policy_version 208792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:47,940][626795] Updated weights for policy 0, policy_version 208802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:48,975][24592] Fps is (10 sec: 48332.3, 60 sec: 45329.0, 300 sec: 43625.9). Total num frames: 1710555136. Throughput: 0: 11164.3. Samples: 177624186. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:48,977][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:49,602][626795] Updated weights for policy 0, policy_version 208812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:51,278][626795] Updated weights for policy 0, policy_version 208822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:52,909][626795] Updated weights for policy 0, policy_version 208832 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:53,975][24592] Fps is (10 sec: 48333.3, 60 sec: 45329.1, 300 sec: 43625.9). Total num frames: 1710792704. Throughput: 0: 11172.4. Samples: 177697218. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:53,976][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:58,019][626795] Updated weights for policy 0, policy_version 208842 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:58,975][24592] Fps is (10 sec: 32768.2, 60 sec: 43008.1, 300 sec: 43098.3). Total num frames: 1710882816. Throughput: 0: 10591.9. Samples: 177707574. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:56:58,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:56:59,751][626795] Updated weights for policy 0, policy_version 208852 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:01,542][626795] Updated weights for policy 0, policy_version 208862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:03,330][626795] Updated weights for policy 0, policy_version 208872 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:03,976][24592] Fps is (10 sec: 31128.2, 60 sec: 42598.6, 300 sec: 43042.6). Total num frames: 1711104000. Throughput: 0: 10203.4. Samples: 177762714. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:03,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000208875_1711104000.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:04,050][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000207608_1700724736.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:05,162][626795] Updated weights for policy 0, policy_version 208882 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:06,909][626795] Updated weights for policy 0, policy_version 208892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:08,572][626795] Updated weights for policy 0, policy_version 208902 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:08,976][24592] Fps is (10 sec: 45874.1, 60 sec: 42598.3, 300 sec: 43598.1). Total num frames: 1711341568. Throughput: 0: 11026.6. Samples: 177833196. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:08,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:10,309][626795] Updated weights for policy 0, policy_version 208912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:11,985][626795] Updated weights for policy 0, policy_version 208922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:13,591][626795] Updated weights for policy 0, policy_version 208932 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:13,975][24592] Fps is (10 sec: 47515.1, 60 sec: 42461.9, 300 sec: 43570.3). Total num frames: 1711579136. Throughput: 0: 11055.4. Samples: 177869148. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:13,977][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:15,370][626795] Updated weights for policy 0, policy_version 208942 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:16,954][626795] Updated weights for policy 0, policy_version 208952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:18,729][626795] Updated weights for policy 0, policy_version 208962 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:18,975][24592] Fps is (10 sec: 48334.0, 60 sec: 42598.6, 300 sec: 43598.1). Total num frames: 1711824896. Throughput: 0: 11086.3. Samples: 177941634. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:18,977][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:20,504][626795] Updated weights for policy 0, policy_version 208972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:22,010][626795] Updated weights for policy 0, policy_version 208982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:23,711][626795] Updated weights for policy 0, policy_version 208992 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:23,976][24592] Fps is (10 sec: 49149.6, 60 sec: 45193.3, 300 sec: 43598.1). Total num frames: 1712070656. Throughput: 0: 11093.6. Samples: 178014930. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:23,978][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:25,518][626795] Updated weights for policy 0, policy_version 209002 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:27,145][626795] Updated weights for policy 0, policy_version 209012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:28,882][626795] Updated weights for policy 0, policy_version 209022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:28,976][24592] Fps is (10 sec: 48332.1, 60 sec: 45192.4, 300 sec: 43626.4). Total num frames: 1712308224. Throughput: 0: 11094.8. Samples: 178050630. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:28,977][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:33,747][626795] Updated weights for policy 0, policy_version 209032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:33,975][24592] Fps is (10 sec: 32769.5, 60 sec: 42871.6, 300 sec: 43098.2). Total num frames: 1712398336. Throughput: 0: 10400.5. Samples: 178092210. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:33,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:35,547][626795] Updated weights for policy 0, policy_version 209042 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:37,464][626795] Updated weights for policy 0, policy_version 209052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:38,975][24592] Fps is (10 sec: 31129.9, 60 sec: 42461.8, 300 sec: 43014.9). Total num frames: 1712619520. Throughput: 0: 10140.2. Samples: 178153530. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:38,978][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:39,207][626795] Updated weights for policy 0, policy_version 209062 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:40,906][626795] Updated weights for policy 0, policy_version 209072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:42,598][626795] Updated weights for policy 0, policy_version 209082 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:43,975][24592] Fps is (10 sec: 46694.9, 60 sec: 42598.4, 300 sec: 43625.9). Total num frames: 1712865280. Throughput: 0: 10695.2. Samples: 178188858. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:43,977][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:44,420][626795] Updated weights for policy 0, policy_version 209092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:45,878][626795] Updated weights for policy 0, policy_version 209102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:47,784][626795] Updated weights for policy 0, policy_version 209112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:48,976][24592] Fps is (10 sec: 48330.7, 60 sec: 42461.6, 300 sec: 43598.0). Total num frames: 1713102848. Throughput: 0: 11083.7. Samples: 178261482. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:48,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:49,441][626795] Updated weights for policy 0, policy_version 209122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:51,136][626795] Updated weights for policy 0, policy_version 209132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:52,773][626795] Updated weights for policy 0, policy_version 209142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:53,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42598.4, 300 sec: 43625.9). Total num frames: 1713348608. Throughput: 0: 11116.2. Samples: 178333422. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:53,979][24592] Avg episode reward: [(0, '4.277')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:54,507][626795] Updated weights for policy 0, policy_version 209152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:56,263][626795] Updated weights for policy 0, policy_version 209162 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:57,824][626795] Updated weights for policy 0, policy_version 209172 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:58,975][24592] Fps is (10 sec: 48335.4, 60 sec: 45056.1, 300 sec: 43598.1). Total num frames: 1713586176. Throughput: 0: 11124.2. Samples: 178369734. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:57:58,976][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:57:59,577][626795] Updated weights for policy 0, policy_version 209182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:01,272][626795] Updated weights for policy 0, policy_version 209192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:03,070][626795] Updated weights for policy 0, policy_version 209202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:03,975][24592] Fps is (10 sec: 47513.4, 60 sec: 45329.3, 300 sec: 43598.1). Total num frames: 1713823744. Throughput: 0: 11134.0. Samples: 178442664. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:03,977][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:04,619][626795] Updated weights for policy 0, policy_version 209212 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:08,928][626772] Signal inference workers to stop experience collection... (2450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:08,930][626772] Signal inference workers to resume experience collection... (2450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:08,937][626795] InferenceWorker_p0-w0: stopping experience collection (2450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:08,945][626795] InferenceWorker_p0-w0: resuming experience collection (2450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:08,979][24592] Fps is (10 sec: 33573.6, 60 sec: 43005.3, 300 sec: 43125.4). Total num frames: 1713922048. Throughput: 0: 10269.6. Samples: 178477098. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:08,980][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:09,625][626795] Updated weights for policy 0, policy_version 209222 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:11,566][626795] Updated weights for policy 0, policy_version 209232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:13,204][626795] Updated weights for policy 0, policy_version 209242 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:13,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42598.4, 300 sec: 43042.7). Total num frames: 1714135040. Throughput: 0: 10211.4. Samples: 178510140. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:13,977][24592] Avg episode reward: [(0, '4.923')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:15,026][626795] Updated weights for policy 0, policy_version 209252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:16,778][626795] Updated weights for policy 0, policy_version 209262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:18,417][626795] Updated weights for policy 0, policy_version 209272 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:18,975][24592] Fps is (10 sec: 45893.4, 60 sec: 42598.4, 300 sec: 43595.9). Total num frames: 1714380800. Throughput: 0: 10854.4. Samples: 178580658. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:18,976][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:20,082][626795] Updated weights for policy 0, policy_version 209282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:21,746][626795] Updated weights for policy 0, policy_version 209292 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:23,472][626795] Updated weights for policy 0, policy_version 209302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:23,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42462.3, 300 sec: 43598.1). Total num frames: 1714618368. Throughput: 0: 11114.6. Samples: 178653684. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:23,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:25,164][626795] Updated weights for policy 0, policy_version 209312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:26,829][626795] Updated weights for policy 0, policy_version 209322 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:28,614][626795] Updated weights for policy 0, policy_version 209332 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:28,976][24592] Fps is (10 sec: 48331.8, 60 sec: 42598.3, 300 sec: 43598.1). Total num frames: 1714864128. Throughput: 0: 11120.5. Samples: 178689282. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:28,977][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:30,216][626795] Updated weights for policy 0, policy_version 209342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:31,914][626795] Updated weights for policy 0, policy_version 209352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:33,602][626795] Updated weights for policy 0, policy_version 209362 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:33,975][24592] Fps is (10 sec: 48332.5, 60 sec: 45056.1, 300 sec: 43598.1). Total num frames: 1715101696. Throughput: 0: 11135.6. Samples: 178762578. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:33,976][24592] Avg episode reward: [(0, '4.840')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:35,338][626795] Updated weights for policy 0, policy_version 209372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:36,995][626795] Updated weights for policy 0, policy_version 209382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:38,670][626795] Updated weights for policy 0, policy_version 209392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:38,975][24592] Fps is (10 sec: 48334.0, 60 sec: 45465.7, 300 sec: 43625.9). Total num frames: 1715347456. Throughput: 0: 11143.6. Samples: 178834884. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:38,977][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:40,410][626795] Updated weights for policy 0, policy_version 209402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:44,071][24592] Fps is (10 sec: 33270.3, 60 sec: 42803.5, 300 sec: 43084.3). Total num frames: 1715437568. Throughput: 0: 10314.8. Samples: 178834884. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:44,072][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:45,406][626795] Updated weights for policy 0, policy_version 209412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:47,057][626795] Updated weights for policy 0, policy_version 209422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:48,968][626795] Updated weights for policy 0, policy_version 209432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:48,975][24592] Fps is (10 sec: 31948.9, 60 sec: 42735.3, 300 sec: 43070.5). Total num frames: 1715666944. Throughput: 0: 10220.0. Samples: 178902564. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:48,977][24592] Avg episode reward: [(0, '4.968')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:50,819][626795] Updated weights for policy 0, policy_version 209442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:52,515][626795] Updated weights for policy 0, policy_version 209452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:53,975][24592] Fps is (10 sec: 46316.6, 60 sec: 42461.9, 300 sec: 43569.1). Total num frames: 1715896320. Throughput: 0: 10991.4. Samples: 178971666. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:53,977][24592] Avg episode reward: [(0, '4.894')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:54,265][626795] Updated weights for policy 0, policy_version 209462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:56,097][626795] Updated weights for policy 0, policy_version 209472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:57,776][626795] Updated weights for policy 0, policy_version 209482 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:58,976][24592] Fps is (10 sec: 46691.0, 60 sec: 42461.3, 300 sec: 43598.0). Total num frames: 1716133888. Throughput: 0: 11028.2. Samples: 179006418. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:58:58,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:58:59,456][626795] Updated weights for policy 0, policy_version 209492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:01,206][626795] Updated weights for policy 0, policy_version 209502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:02,963][626795] Updated weights for policy 0, policy_version 209512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:03,976][24592] Fps is (10 sec: 47510.3, 60 sec: 42461.4, 300 sec: 43570.2). Total num frames: 1716371456. Throughput: 0: 11046.1. Samples: 179077740. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:03,977][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000209518_1716371456.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:04,039][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000208253_1706008576.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:04,634][626795] Updated weights for policy 0, policy_version 209522 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:06,334][626795] Updated weights for policy 0, policy_version 209532 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:07,977][626795] Updated weights for policy 0, policy_version 209542 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:08,988][24592] Fps is (10 sec: 47454.6, 60 sec: 44776.1, 300 sec: 43568.4). Total num frames: 1716609024. Throughput: 0: 11031.6. Samples: 179150250. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:08,989][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:09,746][626795] Updated weights for policy 0, policy_version 209552 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:11,460][626795] Updated weights for policy 0, policy_version 209562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:13,108][626795] Updated weights for policy 0, policy_version 209572 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:13,975][24592] Fps is (10 sec: 48336.0, 60 sec: 45329.1, 300 sec: 43570.3). Total num frames: 1716854784. Throughput: 0: 11037.9. Samples: 179185986. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:13,976][24592] Avg episode reward: [(0, '5.010')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:14,863][626795] Updated weights for policy 0, policy_version 209582 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:19,327][24592] Fps is (10 sec: 34073.4, 60 sec: 42757.7, 300 sec: 43074.8). Total num frames: 1716961280. Throughput: 0: 10144.0. Samples: 179222622. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:19,327][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:19,502][626795] Updated weights for policy 0, policy_version 209592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:21,280][626795] Updated weights for policy 0, policy_version 209602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:23,047][626795] Updated weights for policy 0, policy_version 209612 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:23,975][24592] Fps is (10 sec: 32768.0, 60 sec: 42734.9, 300 sec: 43042.7). Total num frames: 1717182464. Throughput: 0: 10202.0. Samples: 179293974. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:23,976][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:24,705][626795] Updated weights for policy 0, policy_version 209622 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:26,465][626795] Updated weights for policy 0, policy_version 209632 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:28,201][626795] Updated weights for policy 0, policy_version 209642 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:28,975][24592] Fps is (10 sec: 48394.4, 60 sec: 42735.1, 300 sec: 43604.2). Total num frames: 1717428224. Throughput: 0: 11010.5. Samples: 179329308. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:28,977][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:29,811][626795] Updated weights for policy 0, policy_version 209652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:31,502][626795] Updated weights for policy 0, policy_version 209662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:33,236][626795] Updated weights for policy 0, policy_version 209672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:33,976][24592] Fps is (10 sec: 49151.0, 60 sec: 42871.3, 300 sec: 43625.8). Total num frames: 1717673984. Throughput: 0: 11100.3. Samples: 179402082. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:33,976][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:34,965][626795] Updated weights for policy 0, policy_version 209682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:36,600][626795] Updated weights for policy 0, policy_version 209692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:38,321][626795] Updated weights for policy 0, policy_version 209702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:38,976][24592] Fps is (10 sec: 48330.4, 60 sec: 42734.6, 300 sec: 43625.8). Total num frames: 1717911552. Throughput: 0: 11171.9. Samples: 179474406. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:38,976][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:40,080][626795] Updated weights for policy 0, policy_version 209712 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:41,720][626795] Updated weights for policy 0, policy_version 209722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:43,399][626795] Updated weights for policy 0, policy_version 209732 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:43,975][24592] Fps is (10 sec: 47514.5, 60 sec: 45264.4, 300 sec: 43598.1). Total num frames: 1718149120. Throughput: 0: 11197.9. Samples: 179510316. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:43,976][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:45,109][626795] Updated weights for policy 0, policy_version 209742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:46,871][626795] Updated weights for policy 0, policy_version 209752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:48,488][626795] Updated weights for policy 0, policy_version 209762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:48,976][24592] Fps is (10 sec: 47512.7, 60 sec: 45328.5, 300 sec: 43598.0). Total num frames: 1718386688. Throughput: 0: 11224.4. Samples: 179582838. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:48,977][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:50,196][626795] Updated weights for policy 0, policy_version 209772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:55,007][24592] Fps is (10 sec: 32673.0, 60 sec: 42817.6, 300 sec: 43031.1). Total num frames: 1718509568. Throughput: 0: 10177.7. Samples: 179618616. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:55,009][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:55,439][626795] Updated weights for policy 0, policy_version 209782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:57,252][626795] Updated weights for policy 0, policy_version 209792 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:58,976][24592] Fps is (10 sec: 30312.1, 60 sec: 42598.8, 300 sec: 42987.1). Total num frames: 1718689792. Throughput: 0: 10256.6. Samples: 179647536. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 12:59:58,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 12:59:59,111][626795] Updated weights for policy 0, policy_version 209802 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:01,016][626795] Updated weights for policy 0, policy_version 209812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:02,803][626795] Updated weights for policy 0, policy_version 209822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:03,976][24592] Fps is (10 sec: 44758.2, 60 sec: 42325.5, 300 sec: 43471.0). Total num frames: 1718910976. Throughput: 0: 11014.3. Samples: 179714400. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:03,979][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:04,627][626795] Updated weights for policy 0, policy_version 209832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:06,313][626795] Updated weights for policy 0, policy_version 209842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:08,061][626795] Updated weights for policy 0, policy_version 209852 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:08,975][24592] Fps is (10 sec: 45875.6, 60 sec: 42334.6, 300 sec: 43487.0). Total num frames: 1719148544. Throughput: 0: 10906.7. Samples: 179784774. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:08,977][24592] Avg episode reward: [(0, '4.872')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:09,694][626795] Updated weights for policy 0, policy_version 209862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:11,450][626795] Updated weights for policy 0, policy_version 209872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:13,164][626795] Updated weights for policy 0, policy_version 209882 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:13,975][24592] Fps is (10 sec: 47515.4, 60 sec: 42188.8, 300 sec: 43487.1). Total num frames: 1719386112. Throughput: 0: 10935.6. Samples: 179821410. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:13,976][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:14,861][626795] Updated weights for policy 0, policy_version 209892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:16,528][626795] Updated weights for policy 0, policy_version 209902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:18,245][626795] Updated weights for policy 0, policy_version 209912 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:18,976][24592] Fps is (10 sec: 48330.8, 60 sec: 44771.7, 300 sec: 43487.1). Total num frames: 1719631872. Throughput: 0: 10906.3. Samples: 179892870. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:18,977][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:19,927][626795] Updated weights for policy 0, policy_version 209922 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:21,614][626795] Updated weights for policy 0, policy_version 209932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:23,346][626795] Updated weights for policy 0, policy_version 209942 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:23,976][24592] Fps is (10 sec: 48331.3, 60 sec: 44782.7, 300 sec: 43487.0). Total num frames: 1719869440. Throughput: 0: 10910.7. Samples: 179965386. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:23,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:25,070][626795] Updated weights for policy 0, policy_version 209952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:26,718][626795] Updated weights for policy 0, policy_version 209962 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:30,095][24592] Fps is (10 sec: 35363.1, 60 sec: 42488.1, 300 sec: 43045.9). Total num frames: 1720025088. Throughput: 0: 10656.3. Samples: 180001782. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:30,096][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:31,361][626795] Updated weights for policy 0, policy_version 209972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:33,044][626795] Updated weights for policy 0, policy_version 209982 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:33,975][24592] Fps is (10 sec: 34407.2, 60 sec: 42325.4, 300 sec: 43014.9). Total num frames: 1720213504. Throughput: 0: 10143.5. Samples: 180039288. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:33,977][24592] Avg episode reward: [(0, '4.895')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:34,698][626795] Updated weights for policy 0, policy_version 209992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:36,454][626795] Updated weights for policy 0, policy_version 210002 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:38,155][626795] Updated weights for policy 0, policy_version 210012 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:38,975][24592] Fps is (10 sec: 47970.6, 60 sec: 42325.7, 300 sec: 43508.8). Total num frames: 1720451072. Throughput: 0: 11203.8. Samples: 180111222. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:38,976][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:39,808][626795] Updated weights for policy 0, policy_version 210022 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:41,622][626795] Updated weights for policy 0, policy_version 210032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:43,294][626795] Updated weights for policy 0, policy_version 210042 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:43,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42461.8, 300 sec: 43598.1). Total num frames: 1720696832. Throughput: 0: 11097.9. Samples: 180146940. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:43,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:44,971][626795] Updated weights for policy 0, policy_version 210052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:46,643][626795] Updated weights for policy 0, policy_version 210062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:48,277][626795] Updated weights for policy 0, policy_version 210072 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:48,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42462.4, 300 sec: 43598.1). Total num frames: 1720934400. Throughput: 0: 11228.9. Samples: 180219696. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:48,976][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:50,147][626795] Updated weights for policy 0, policy_version 210082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:51,890][626795] Updated weights for policy 0, policy_version 210092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:53,673][626795] Updated weights for policy 0, policy_version 210102 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:53,975][24592] Fps is (10 sec: 46694.7, 60 sec: 45011.0, 300 sec: 43598.1). Total num frames: 1721163776. Throughput: 0: 11208.7. Samples: 180289164. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:53,977][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:55,403][626795] Updated weights for policy 0, policy_version 210112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:57,087][626795] Updated weights for policy 0, policy_version 210122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:00:58,698][626795] Updated weights for policy 0, policy_version 210132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:58,975][24592] Fps is (10 sec: 46694.3, 60 sec: 45192.6, 300 sec: 43570.4). Total num frames: 1721401344. Throughput: 0: 11201.5. Samples: 180325476. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:00:58,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:00,568][626795] Updated weights for policy 0, policy_version 210142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:02,192][626795] Updated weights for policy 0, policy_version 210152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:05,819][24592] Fps is (10 sec: 33892.4, 60 sec: 42918.3, 300 sec: 43051.4). Total num frames: 1721565184. Throughput: 0: 9999.1. Samples: 180361260. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:05,823][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:05,828][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000210152_1721565184.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:05,903][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000208875_1711104000.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:07,554][626795] Updated weights for policy 0, policy_version 210162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:08,975][24592] Fps is (10 sec: 30310.4, 60 sec: 42598.4, 300 sec: 42959.4). Total num frames: 1721704448. Throughput: 0: 10192.9. Samples: 180424062. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:08,976][24592] Avg episode reward: [(0, '4.359')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:09,479][626795] Updated weights for policy 0, policy_version 210172 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:11,397][626795] Updated weights for policy 0, policy_version 210182 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:12,996][626795] Updated weights for policy 0, policy_version 210192 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:13,976][24592] Fps is (10 sec: 45194.4, 60 sec: 42461.6, 300 sec: 42931.6). Total num frames: 1721933824. Throughput: 0: 10379.6. Samples: 180457242. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:13,977][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:14,730][626795] Updated weights for policy 0, policy_version 210202 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:16,373][626795] Updated weights for policy 0, policy_version 210212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:18,101][626795] Updated weights for policy 0, policy_version 210222 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:18,975][24592] Fps is (10 sec: 46693.7, 60 sec: 42325.5, 300 sec: 43431.7). Total num frames: 1722171392. Throughput: 0: 10903.2. Samples: 180529932. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:18,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:19,768][626795] Updated weights for policy 0, policy_version 210232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:21,484][626795] Updated weights for policy 0, policy_version 210242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:23,168][626795] Updated weights for policy 0, policy_version 210252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:23,975][24592] Fps is (10 sec: 49153.6, 60 sec: 42598.6, 300 sec: 43487.0). Total num frames: 1722425344. Throughput: 0: 10930.3. Samples: 180603084. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:23,976][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:24,877][626795] Updated weights for policy 0, policy_version 210262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:26,517][626795] Updated weights for policy 0, policy_version 210272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:28,241][626795] Updated weights for policy 0, policy_version 210282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:28,976][24592] Fps is (10 sec: 49151.9, 60 sec: 44799.7, 300 sec: 43514.8). Total num frames: 1722662912. Throughput: 0: 10931.3. Samples: 180638850. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:28,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:29,991][626795] Updated weights for policy 0, policy_version 210292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:31,707][626795] Updated weights for policy 0, policy_version 210302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:33,441][626795] Updated weights for policy 0, policy_version 210312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:33,975][24592] Fps is (10 sec: 47513.6, 60 sec: 44783.0, 300 sec: 43487.0). Total num frames: 1722900480. Throughput: 0: 10916.7. Samples: 180710946. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:33,976][24592] Avg episode reward: [(0, '4.965')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:35,118][626795] Updated weights for policy 0, policy_version 210322 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:36,810][626795] Updated weights for policy 0, policy_version 210332 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:41,130][24592] Fps is (10 sec: 35046.1, 60 sec: 42439.3, 300 sec: 43006.2). Total num frames: 1723088896. Throughput: 0: 9693.2. Samples: 180746250. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:41,131][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:41,677][626795] Updated weights for policy 0, policy_version 210342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:43,415][626795] Updated weights for policy 0, policy_version 210352 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:43,976][24592] Fps is (10 sec: 31947.4, 60 sec: 42052.0, 300 sec: 42931.6). Total num frames: 1723219968. Throughput: 0: 10123.0. Samples: 180781014. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:43,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:45,163][626795] Updated weights for policy 0, policy_version 210362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:47,035][626795] Updated weights for policy 0, policy_version 210372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:48,631][626795] Updated weights for policy 0, policy_version 210382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:48,975][24592] Fps is (10 sec: 46991.6, 60 sec: 42052.3, 300 sec: 42931.6). Total num frames: 1723457536. Throughput: 0: 11331.9. Samples: 180850302. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:48,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:50,294][626795] Updated weights for policy 0, policy_version 210392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:52,009][626795] Updated weights for policy 0, policy_version 210402 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:53,775][626795] Updated weights for policy 0, policy_version 210412 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:53,976][24592] Fps is (10 sec: 48332.9, 60 sec: 42325.0, 300 sec: 43459.2). Total num frames: 1723703296. Throughput: 0: 11091.4. Samples: 180923178. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:53,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:55,499][626795] Updated weights for policy 0, policy_version 210422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:57,220][626795] Updated weights for policy 0, policy_version 210432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:01:58,907][626795] Updated weights for policy 0, policy_version 210442 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:58,975][24592] Fps is (10 sec: 48332.4, 60 sec: 42325.3, 300 sec: 43514.8). Total num frames: 1723940864. Throughput: 0: 11141.5. Samples: 180958608. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:01:58,976][24592] Avg episode reward: [(0, '4.419')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:00,598][626795] Updated weights for policy 0, policy_version 210452 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:02,301][626795] Updated weights for policy 0, policy_version 210462 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:03,975][24592] Fps is (10 sec: 47515.8, 60 sec: 44934.8, 300 sec: 43514.8). Total num frames: 1724178432. Throughput: 0: 11141.9. Samples: 181031316. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:03,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:04,076][626795] Updated weights for policy 0, policy_version 210472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:05,589][626795] Updated weights for policy 0, policy_version 210482 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:07,393][626795] Updated weights for policy 0, policy_version 210492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:08,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45329.1, 300 sec: 43542.6). Total num frames: 1724424192. Throughput: 0: 11110.7. Samples: 181103064. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:08,976][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:09,146][626795] Updated weights for policy 0, policy_version 210502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:10,904][626795] Updated weights for policy 0, policy_version 210512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:12,501][626795] Updated weights for policy 0, policy_version 210522 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:16,638][24592] Fps is (10 sec: 35582.5, 60 sec: 43011.2, 300 sec: 43015.5). Total num frames: 1724628992. Throughput: 0: 10501.7. Samples: 181139382. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:16,639][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:17,708][626795] Updated weights for policy 0, policy_version 210532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:18,976][24592] Fps is (10 sec: 30308.3, 60 sec: 42598.0, 300 sec: 42903.8). Total num frames: 1724727296. Throughput: 0: 10210.6. Samples: 181170432. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:18,978][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:19,397][626795] Updated weights for policy 0, policy_version 210542 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:21,186][626795] Updated weights for policy 0, policy_version 210552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:22,903][626795] Updated weights for policy 0, policy_version 210562 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:23,975][24592] Fps is (10 sec: 46890.3, 60 sec: 42461.9, 300 sec: 42931.7). Total num frames: 1724973056. Throughput: 0: 11536.9. Samples: 181240548. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:23,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:24,594][626795] Updated weights for policy 0, policy_version 210572 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:26,307][626795] Updated weights for policy 0, policy_version 210582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:27,954][626795] Updated weights for policy 0, policy_version 210592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:28,975][24592] Fps is (10 sec: 48336.6, 60 sec: 42462.0, 300 sec: 43431.5). Total num frames: 1725210624. Throughput: 0: 11020.7. Samples: 181276938. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:28,976][24592] Avg episode reward: [(0, '5.100')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:29,724][626795] Updated weights for policy 0, policy_version 210602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:31,370][626795] Updated weights for policy 0, policy_version 210612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:33,159][626795] Updated weights for policy 0, policy_version 210622 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:33,976][24592] Fps is (10 sec: 49150.0, 60 sec: 42734.7, 300 sec: 43542.5). Total num frames: 1725464576. Throughput: 0: 11088.6. Samples: 181349292. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:33,977][24592] Avg episode reward: [(0, '4.905')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:34,812][626795] Updated weights for policy 0, policy_version 210632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:36,552][626795] Updated weights for policy 0, policy_version 210642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:38,216][626795] Updated weights for policy 0, policy_version 210652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:38,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45035.2, 300 sec: 43487.0). Total num frames: 1725693952. Throughput: 0: 11071.6. Samples: 181421394. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:38,976][24592] Avg episode reward: [(0, '5.052')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:39,947][626795] Updated weights for policy 0, policy_version 210662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:41,618][626795] Updated weights for policy 0, policy_version 210672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:43,334][626795] Updated weights for policy 0, policy_version 210682 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:43,975][24592] Fps is (10 sec: 46696.2, 60 sec: 45192.9, 300 sec: 43487.1). Total num frames: 1725931520. Throughput: 0: 11080.0. Samples: 181457208. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:43,976][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:44,917][626795] Updated weights for policy 0, policy_version 210692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:46,750][626795] Updated weights for policy 0, policy_version 210702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:48,512][626795] Updated weights for policy 0, policy_version 210712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:52,127][24592] Fps is (10 sec: 36126.2, 60 sec: 42936.8, 300 sec: 42999.8). Total num frames: 1726169088. Throughput: 0: 10353.7. Samples: 181529868. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:52,129][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:53,551][626795] Updated weights for policy 0, policy_version 210722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:53,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42325.7, 300 sec: 42903.9). Total num frames: 1726242816. Throughput: 0: 10161.5. Samples: 181560330. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:53,976][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:55,299][626795] Updated weights for policy 0, policy_version 210732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:57,009][626795] Updated weights for policy 0, policy_version 210742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:02:58,700][626795] Updated weights for policy 0, policy_version 210752 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:58,975][24592] Fps is (10 sec: 46654.8, 60 sec: 42461.9, 300 sec: 42931.6). Total num frames: 1726488576. Throughput: 0: 10774.0. Samples: 181595526. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:02:58,976][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:00,451][626795] Updated weights for policy 0, policy_version 210762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:02,163][626795] Updated weights for policy 0, policy_version 210772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:03,826][626795] Updated weights for policy 0, policy_version 210782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:03,976][24592] Fps is (10 sec: 48331.5, 60 sec: 42461.7, 300 sec: 43404.3). Total num frames: 1726726144. Throughput: 0: 11048.9. Samples: 181667628. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:03,976][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:04,010][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000210783_1726734336.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:04,089][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000209518_1716371456.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:05,659][626795] Updated weights for policy 0, policy_version 210792 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:07,353][626795] Updated weights for policy 0, policy_version 210802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:08,976][24592] Fps is (10 sec: 47509.3, 60 sec: 42324.7, 300 sec: 43486.9). Total num frames: 1726963712. Throughput: 0: 11088.4. Samples: 181739538. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:08,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:08,983][626795] Updated weights for policy 0, policy_version 210812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:10,706][626795] Updated weights for policy 0, policy_version 210822 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:12,412][626795] Updated weights for policy 0, policy_version 210832 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:13,975][24592] Fps is (10 sec: 48333.7, 60 sec: 45004.9, 300 sec: 43487.0). Total num frames: 1727209472. Throughput: 0: 11073.6. Samples: 181775250. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:13,976][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:14,163][626795] Updated weights for policy 0, policy_version 210842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:15,855][626795] Updated weights for policy 0, policy_version 210852 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:17,490][626795] Updated weights for policy 0, policy_version 210862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:18,975][24592] Fps is (10 sec: 48337.3, 60 sec: 45329.6, 300 sec: 43487.0). Total num frames: 1727447040. Throughput: 0: 11081.6. Samples: 181847958. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:18,978][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:19,299][626795] Updated weights for policy 0, policy_version 210872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:20,917][626795] Updated weights for policy 0, policy_version 210882 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:22,679][626795] Updated weights for policy 0, policy_version 210892 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:23,975][24592] Fps is (10 sec: 47513.9, 60 sec: 45192.5, 300 sec: 43459.3). Total num frames: 1727684608. Throughput: 0: 11058.3. Samples: 181919016. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:23,977][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:27,764][626795] Updated weights for policy 0, policy_version 210902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:28,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42461.8, 300 sec: 42903.9). Total num frames: 1727758336. Throughput: 0: 10445.2. Samples: 181927242. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:28,977][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:29,682][626795] Updated weights for policy 0, policy_version 210912 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:31,349][626795] Updated weights for policy 0, policy_version 210922 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:33,005][626795] Updated weights for policy 0, policy_version 210932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:33,975][24592] Fps is (10 sec: 31129.4, 60 sec: 42189.0, 300 sec: 42876.1). Total num frames: 1727995904. Throughput: 0: 10873.2. Samples: 181984890. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:33,977][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:34,718][626795] Updated weights for policy 0, policy_version 210942 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:36,486][626795] Updated weights for policy 0, policy_version 210952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:38,094][626795] Updated weights for policy 0, policy_version 210962 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:38,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42325.3, 300 sec: 43390.0). Total num frames: 1728233472. Throughput: 0: 11038.7. Samples: 182057070. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:38,976][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:39,816][626795] Updated weights for policy 0, policy_version 210972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:41,523][626795] Updated weights for policy 0, policy_version 210982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:43,185][626795] Updated weights for policy 0, policy_version 210992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:43,976][24592] Fps is (10 sec: 48330.2, 60 sec: 42461.5, 300 sec: 43431.4). Total num frames: 1728479232. Throughput: 0: 11061.7. Samples: 182093310. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:43,977][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:45,030][626795] Updated weights for policy 0, policy_version 211002 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:46,595][626795] Updated weights for policy 0, policy_version 211012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:48,340][626795] Updated weights for policy 0, policy_version 211022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:48,976][24592] Fps is (10 sec: 48332.0, 60 sec: 44816.2, 300 sec: 43459.2). Total num frames: 1728716800. Throughput: 0: 11057.5. Samples: 182165214. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:48,976][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:49,942][626795] Updated weights for policy 0, policy_version 211032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:51,765][626795] Updated weights for policy 0, policy_version 211042 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:53,516][626795] Updated weights for policy 0, policy_version 211052 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:53,975][24592] Fps is (10 sec: 47516.4, 60 sec: 45192.5, 300 sec: 43459.4). Total num frames: 1728954368. Throughput: 0: 11050.0. Samples: 182236776. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:53,976][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:55,203][626795] Updated weights for policy 0, policy_version 211062 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:56,945][626795] Updated weights for policy 0, policy_version 211072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:03:58,656][626795] Updated weights for policy 0, policy_version 211082 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:58,975][24592] Fps is (10 sec: 47514.1, 60 sec: 45056.0, 300 sec: 43459.3). Total num frames: 1729191936. Throughput: 0: 11059.2. Samples: 182272914. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:03:58,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:03,756][626795] Updated weights for policy 0, policy_version 211092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:03,976][24592] Fps is (10 sec: 31129.5, 60 sec: 42325.5, 300 sec: 42905.8). Total num frames: 1729265664. Throughput: 0: 10296.5. Samples: 182311302. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:03,976][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:05,640][626795] Updated weights for policy 0, policy_version 211102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:07,341][626795] Updated weights for policy 0, policy_version 211112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:08,975][24592] Fps is (10 sec: 31129.8, 60 sec: 42326.0, 300 sec: 42876.1). Total num frames: 1729503232. Throughput: 0: 10115.9. Samples: 182374230. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:08,976][626795] Updated weights for policy 0, policy_version 211122 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:08,977][24592] Avg episode reward: [(0, '5.015')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:10,660][626795] Updated weights for policy 0, policy_version 211132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:12,325][626795] Updated weights for policy 0, policy_version 211142 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:13,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42325.4, 300 sec: 43399.9). Total num frames: 1729748992. Throughput: 0: 10730.7. Samples: 182410122. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:13,976][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:14,105][626795] Updated weights for policy 0, policy_version 211152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:15,776][626795] Updated weights for policy 0, policy_version 211162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:17,575][626795] Updated weights for policy 0, policy_version 211172 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:18,976][24592] Fps is (10 sec: 49149.0, 60 sec: 42461.4, 300 sec: 43431.4). Total num frames: 1729994752. Throughput: 0: 11060.9. Samples: 182482638. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:18,978][24592] Avg episode reward: [(0, '4.928')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:19,142][626795] Updated weights for policy 0, policy_version 211182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:20,868][626795] Updated weights for policy 0, policy_version 211192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:22,597][626795] Updated weights for policy 0, policy_version 211202 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:23,976][24592] Fps is (10 sec: 47511.6, 60 sec: 42325.0, 300 sec: 43375.9). Total num frames: 1730224128. Throughput: 0: 11082.0. Samples: 182555766. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:23,977][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:24,258][626795] Updated weights for policy 0, policy_version 211212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:26,046][626795] Updated weights for policy 0, policy_version 211222 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:27,568][626795] Updated weights for policy 0, policy_version 211232 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:28,975][24592] Fps is (10 sec: 47516.5, 60 sec: 45192.5, 300 sec: 43376.0). Total num frames: 1730469888. Throughput: 0: 11070.1. Samples: 182591460. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:28,978][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:29,391][626795] Updated weights for policy 0, policy_version 211242 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:31,033][626795] Updated weights for policy 0, policy_version 211252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:32,655][626795] Updated weights for policy 0, policy_version 211262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:33,975][24592] Fps is (10 sec: 49153.8, 60 sec: 45329.1, 300 sec: 43403.8). Total num frames: 1730715648. Throughput: 0: 11084.6. Samples: 182664018. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:33,977][24592] Avg episode reward: [(0, '4.801')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:34,518][626795] Updated weights for policy 0, policy_version 211272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:38,975][24592] Fps is (10 sec: 31949.1, 60 sec: 42598.5, 300 sec: 42848.3). Total num frames: 1730789376. Throughput: 0: 10206.8. Samples: 182696082. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:38,976][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:39,687][626795] Updated weights for policy 0, policy_version 211282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:41,479][626795] Updated weights for policy 0, policy_version 211292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:43,116][626795] Updated weights for policy 0, policy_version 211302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:43,975][24592] Fps is (10 sec: 30310.5, 60 sec: 42325.7, 300 sec: 42820.7). Total num frames: 1731018752. Throughput: 0: 10136.1. Samples: 182729040. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:43,976][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:44,836][626795] Updated weights for policy 0, policy_version 211312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:46,525][626795] Updated weights for policy 0, policy_version 211322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:48,261][626795] Updated weights for policy 0, policy_version 211332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:48,975][24592] Fps is (10 sec: 47513.1, 60 sec: 42462.0, 300 sec: 43388.9). Total num frames: 1731264512. Throughput: 0: 10874.7. Samples: 182800662. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:48,977][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:50,020][626795] Updated weights for policy 0, policy_version 211342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:51,763][626795] Updated weights for policy 0, policy_version 211352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:53,485][626795] Updated weights for policy 0, policy_version 211362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:53,977][24592] Fps is (10 sec: 47507.1, 60 sec: 42324.4, 300 sec: 43403.5). Total num frames: 1731493888. Throughput: 0: 11065.4. Samples: 182872188. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:53,978][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:55,211][626795] Updated weights for policy 0, policy_version 211372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:56,966][626795] Updated weights for policy 0, policy_version 211382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:04:58,593][626795] Updated weights for policy 0, policy_version 211392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:58,975][24592] Fps is (10 sec: 46694.3, 60 sec: 42325.4, 300 sec: 43459.3). Total num frames: 1731731456. Throughput: 0: 11046.5. Samples: 182907216. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:04:58,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:00,420][626795] Updated weights for policy 0, policy_version 211402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:02,033][626795] Updated weights for policy 0, policy_version 211412 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:03,754][626795] Updated weights for policy 0, policy_version 211422 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:03,975][24592] Fps is (10 sec: 48338.9, 60 sec: 45192.5, 300 sec: 43487.0). Total num frames: 1731977216. Throughput: 0: 11034.7. Samples: 182979192. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:03,978][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000211423_1731977216.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:04,061][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000210152_1721565184.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:05,529][626795] Updated weights for policy 0, policy_version 211432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:07,228][626795] Updated weights for policy 0, policy_version 211442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:08,946][626795] Updated weights for policy 0, policy_version 211452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:08,976][24592] Fps is (10 sec: 48331.3, 60 sec: 45192.3, 300 sec: 43487.0). Total num frames: 1732214784. Throughput: 0: 11005.8. Samples: 183051024. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:08,977][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:10,807][626795] Updated weights for policy 0, policy_version 211462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:14,080][24592] Fps is (10 sec: 31618.2, 60 sec: 42387.9, 300 sec: 42916.5). Total num frames: 1732296704. Throughput: 0: 10188.8. Samples: 183051024. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:14,081][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:15,826][626795] Updated weights for policy 0, policy_version 211472 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:17,571][626795] Updated weights for policy 0, policy_version 211482 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:18,975][24592] Fps is (10 sec: 31130.6, 60 sec: 42189.2, 300 sec: 42903.9). Total num frames: 1732526080. Throughput: 0: 10048.8. Samples: 183116214. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:18,977][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:19,372][626795] Updated weights for policy 0, policy_version 211492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:21,058][626795] Updated weights for policy 0, policy_version 211502 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:22,728][626795] Updated weights for policy 0, policy_version 211512 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:23,975][24592] Fps is (10 sec: 47188.3, 60 sec: 42325.6, 300 sec: 43346.1). Total num frames: 1732763648. Throughput: 0: 10946.5. Samples: 183188676. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:23,976][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:24,393][626795] Updated weights for policy 0, policy_version 211522 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:26,124][626795] Updated weights for policy 0, policy_version 211532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:27,853][626795] Updated weights for policy 0, policy_version 211542 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:28,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42188.8, 300 sec: 43348.2). Total num frames: 1733001216. Throughput: 0: 11007.6. Samples: 183224382. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:28,976][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:29,576][626795] Updated weights for policy 0, policy_version 211552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:31,204][626795] Updated weights for policy 0, policy_version 211562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:32,975][626795] Updated weights for policy 0, policy_version 211572 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:33,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42052.3, 300 sec: 43348.2). Total num frames: 1733238784. Throughput: 0: 11014.1. Samples: 183296298. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:33,978][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:34,697][626795] Updated weights for policy 0, policy_version 211582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:36,373][626795] Updated weights for policy 0, policy_version 211592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:38,163][626795] Updated weights for policy 0, policy_version 211602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:38,976][24592] Fps is (10 sec: 48331.5, 60 sec: 44919.2, 300 sec: 43348.2). Total num frames: 1733484544. Throughput: 0: 11021.2. Samples: 183368130. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:38,978][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:39,757][626795] Updated weights for policy 0, policy_version 211612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:41,553][626795] Updated weights for policy 0, policy_version 211622 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:41,561][626772] Signal inference workers to stop experience collection... (2500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:41,562][626772] Signal inference workers to resume experience collection... (2500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:41,574][626795] InferenceWorker_p0-w0: stopping experience collection (2500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:41,578][626795] InferenceWorker_p0-w0: resuming experience collection (2500 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:43,246][626795] Updated weights for policy 0, policy_version 211632 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:43,975][24592] Fps is (10 sec: 48332.5, 60 sec: 45056.0, 300 sec: 43348.2). Total num frames: 1733722112. Throughput: 0: 11053.7. Samples: 183404634. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:43,977][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:44,989][626795] Updated weights for policy 0, policy_version 211642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:49,445][24592] Fps is (10 sec: 32862.4, 60 sec: 42402.7, 300 sec: 42863.3). Total num frames: 1733828608. Throughput: 0: 10123.7. Samples: 183439518. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:49,446][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:50,001][626795] Updated weights for policy 0, policy_version 211652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:51,855][626795] Updated weights for policy 0, policy_version 211662 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:53,563][626795] Updated weights for policy 0, policy_version 211672 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:53,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42326.3, 300 sec: 42820.6). Total num frames: 1734033408. Throughput: 0: 10116.9. Samples: 183506280. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:53,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:55,196][626795] Updated weights for policy 0, policy_version 211682 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:56,968][626795] Updated weights for policy 0, policy_version 211692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:05:58,654][626795] Updated weights for policy 0, policy_version 211702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:58,975][24592] Fps is (10 sec: 46418.8, 60 sec: 42325.3, 300 sec: 43341.3). Total num frames: 1734270976. Throughput: 0: 10937.6. Samples: 183542070. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:05:58,976][24592] Avg episode reward: [(0, '4.864')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:00,413][626795] Updated weights for policy 0, policy_version 211712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:02,096][626795] Updated weights for policy 0, policy_version 211722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:03,749][626795] Updated weights for policy 0, policy_version 211732 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:03,976][24592] Fps is (10 sec: 48330.5, 60 sec: 42325.1, 300 sec: 43431.4). Total num frames: 1734516736. Throughput: 0: 11060.3. Samples: 183613932. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:03,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:05,612][626795] Updated weights for policy 0, policy_version 211742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:07,251][626795] Updated weights for policy 0, policy_version 211752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:08,932][626795] Updated weights for policy 0, policy_version 211762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:08,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42325.5, 300 sec: 43459.3). Total num frames: 1734754304. Throughput: 0: 11043.3. Samples: 183685626. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:08,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:10,566][626795] Updated weights for policy 0, policy_version 211772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:12,356][626795] Updated weights for policy 0, policy_version 211782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:13,934][626795] Updated weights for policy 0, policy_version 211792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:13,975][24592] Fps is (10 sec: 48335.2, 60 sec: 45134.7, 300 sec: 43487.0). Total num frames: 1735000064. Throughput: 0: 11063.3. Samples: 183722232. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:13,976][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:15,592][626795] Updated weights for policy 0, policy_version 211802 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:17,334][626795] Updated weights for policy 0, policy_version 211812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:18,976][24592] Fps is (10 sec: 48331.5, 60 sec: 45192.3, 300 sec: 43431.4). Total num frames: 1735237632. Throughput: 0: 11082.7. Samples: 183795024. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:18,976][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:19,193][626795] Updated weights for policy 0, policy_version 211822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:20,873][626795] Updated weights for policy 0, policy_version 211832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:24,905][24592] Fps is (10 sec: 32979.5, 60 sec: 42620.8, 300 sec: 42907.5). Total num frames: 1735360512. Throughput: 0: 10041.7. Samples: 183829338. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:24,908][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:26,103][626795] Updated weights for policy 0, policy_version 211842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:27,836][626795] Updated weights for policy 0, policy_version 211852 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:28,976][24592] Fps is (10 sec: 29491.0, 60 sec: 42188.5, 300 sec: 42820.5). Total num frames: 1735532544. Throughput: 0: 10124.1. Samples: 183860220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:28,981][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:29,711][626795] Updated weights for policy 0, policy_version 211862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:31,454][626795] Updated weights for policy 0, policy_version 211872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:33,120][626795] Updated weights for policy 0, policy_version 211882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:33,976][24592] Fps is (10 sec: 45156.3, 60 sec: 42188.7, 300 sec: 43303.5). Total num frames: 1735770112. Throughput: 0: 10989.8. Samples: 183928896. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:33,977][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:34,930][626795] Updated weights for policy 0, policy_version 211892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:36,621][626795] Updated weights for policy 0, policy_version 211902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:38,312][626795] Updated weights for policy 0, policy_version 211912 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:38,976][24592] Fps is (10 sec: 47513.3, 60 sec: 42052.1, 300 sec: 43348.2). Total num frames: 1736007680. Throughput: 0: 10978.3. Samples: 184000308. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:38,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:40,083][626795] Updated weights for policy 0, policy_version 211922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:41,823][626795] Updated weights for policy 0, policy_version 211932 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:43,519][626795] Updated weights for policy 0, policy_version 211942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:43,975][24592] Fps is (10 sec: 47514.5, 60 sec: 42052.3, 300 sec: 43348.2). Total num frames: 1736245248. Throughput: 0: 10960.9. Samples: 184035312. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:43,977][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:45,283][626795] Updated weights for policy 0, policy_version 211952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:46,966][626795] Updated weights for policy 0, policy_version 211962 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:48,711][626795] Updated weights for policy 0, policy_version 211972 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:48,975][24592] Fps is (10 sec: 47515.6, 60 sec: 44586.1, 300 sec: 43320.5). Total num frames: 1736482816. Throughput: 0: 10948.8. Samples: 184106622. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:48,976][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:50,523][626795] Updated weights for policy 0, policy_version 211982 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:52,228][626795] Updated weights for policy 0, policy_version 211992 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:53,975][24592] Fps is (10 sec: 46694.8, 60 sec: 44646.5, 300 sec: 43292.7). Total num frames: 1736712192. Throughput: 0: 10916.7. Samples: 184176876. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:53,977][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:54,073][626795] Updated weights for policy 0, policy_version 212002 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:06:55,595][626795] Updated weights for policy 0, policy_version 212012 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:59,834][24592] Fps is (10 sec: 35458.5, 60 sec: 42670.6, 300 sec: 42890.1). Total num frames: 1736867840. Throughput: 0: 10686.5. Samples: 184212300. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:06:59,835][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:00,201][626795] Updated weights for policy 0, policy_version 212022 (0.2698)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:01,920][626795] Updated weights for policy 0, policy_version 212032 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:03,726][626795] Updated weights for policy 0, policy_version 212042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:03,976][24592] Fps is (10 sec: 34405.5, 60 sec: 42325.6, 300 sec: 42820.5). Total num frames: 1737056256. Throughput: 0: 10117.1. Samples: 184250292. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:03,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:04,007][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000212044_1737064448.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:04,064][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000210783_1726734336.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:05,445][626795] Updated weights for policy 0, policy_version 212052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:07,109][626795] Updated weights for policy 0, policy_version 212062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:08,773][626795] Updated weights for policy 0, policy_version 212072 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:08,975][24592] Fps is (10 sec: 46597.9, 60 sec: 42325.3, 300 sec: 43322.6). Total num frames: 1737293824. Throughput: 0: 11174.5. Samples: 184321806. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:08,977][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:10,627][626795] Updated weights for policy 0, policy_version 212082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:12,249][626795] Updated weights for policy 0, policy_version 212092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:13,955][626795] Updated weights for policy 0, policy_version 212102 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:13,975][24592] Fps is (10 sec: 48333.7, 60 sec: 42325.3, 300 sec: 43431.6). Total num frames: 1737539584. Throughput: 0: 11041.7. Samples: 184357092. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:13,976][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:15,654][626795] Updated weights for policy 0, policy_version 212112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:17,390][626795] Updated weights for policy 0, policy_version 212122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:18,975][24592] Fps is (10 sec: 48333.2, 60 sec: 42325.5, 300 sec: 43403.7). Total num frames: 1737777152. Throughput: 0: 11143.1. Samples: 184430334. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:18,978][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:19,131][626795] Updated weights for policy 0, policy_version 212132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:20,781][626795] Updated weights for policy 0, policy_version 212142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:22,443][626795] Updated weights for policy 0, policy_version 212152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:23,975][24592] Fps is (10 sec: 47513.9, 60 sec: 44932.9, 300 sec: 43403.7). Total num frames: 1738014720. Throughput: 0: 11149.7. Samples: 184502040. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:23,976][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:24,267][626795] Updated weights for policy 0, policy_version 212162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:25,912][626795] Updated weights for policy 0, policy_version 212172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:27,702][626795] Updated weights for policy 0, policy_version 212182 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:28,975][24592] Fps is (10 sec: 47514.2, 60 sec: 45329.4, 300 sec: 43348.2). Total num frames: 1738252288. Throughput: 0: 11171.0. Samples: 184538004. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:28,976][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:29,356][626795] Updated weights for policy 0, policy_version 212192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:31,019][626795] Updated weights for policy 0, policy_version 212202 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:35,518][24592] Fps is (10 sec: 33356.3, 60 sec: 42728.7, 300 sec: 42846.4). Total num frames: 1738399744. Throughput: 0: 10033.4. Samples: 184573602. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:35,519][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:36,236][626795] Updated weights for policy 0, policy_version 212212 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:37,848][626795] Updated weights for policy 0, policy_version 212222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:38,976][24592] Fps is (10 sec: 31948.3, 60 sec: 42735.2, 300 sec: 42848.3). Total num frames: 1738571776. Throughput: 0: 10287.3. Samples: 184639806. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:38,977][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:39,707][626795] Updated weights for policy 0, policy_version 212232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:41,399][626795] Updated weights for policy 0, policy_version 212242 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:43,005][626795] Updated weights for policy 0, policy_version 212252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:43,976][24592] Fps is (10 sec: 48430.4, 60 sec: 42734.8, 300 sec: 43311.1). Total num frames: 1738809344. Throughput: 0: 10504.2. Samples: 184675974. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:43,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:44,770][626795] Updated weights for policy 0, policy_version 212262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:46,497][626795] Updated weights for policy 0, policy_version 212272 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:48,203][626795] Updated weights for policy 0, policy_version 212282 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:48,975][24592] Fps is (10 sec: 47514.0, 60 sec: 42734.9, 300 sec: 43403.7). Total num frames: 1739046912. Throughput: 0: 11051.4. Samples: 184747602. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:48,976][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:49,936][626795] Updated weights for policy 0, policy_version 212292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:51,694][626795] Updated weights for policy 0, policy_version 212302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:53,213][626795] Updated weights for policy 0, policy_version 212312 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:53,975][24592] Fps is (10 sec: 48333.7, 60 sec: 43007.9, 300 sec: 43403.7). Total num frames: 1739292672. Throughput: 0: 11050.4. Samples: 184819074. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:53,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:55,088][626795] Updated weights for policy 0, policy_version 212322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:56,770][626795] Updated weights for policy 0, policy_version 212332 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:07:58,454][626795] Updated weights for policy 0, policy_version 212342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:58,976][24592] Fps is (10 sec: 47512.1, 60 sec: 44878.6, 300 sec: 43375.9). Total num frames: 1739522048. Throughput: 0: 11070.1. Samples: 184855248. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:07:58,977][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:00,208][626795] Updated weights for policy 0, policy_version 212352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:01,893][626795] Updated weights for policy 0, policy_version 212362 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:03,604][626795] Updated weights for policy 0, policy_version 212372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:03,975][24592] Fps is (10 sec: 46694.3, 60 sec: 45056.1, 300 sec: 43376.1). Total num frames: 1739759616. Throughput: 0: 11029.5. Samples: 184926660. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:03,976][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:05,347][626795] Updated weights for policy 0, policy_version 212382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:07,109][626795] Updated weights for policy 0, policy_version 212392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:11,233][24592] Fps is (10 sec: 34084.8, 60 sec: 42501.1, 300 sec: 42826.0). Total num frames: 1739939840. Throughput: 0: 9745.9. Samples: 184962612. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:11,234][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:12,198][626795] Updated weights for policy 0, policy_version 212402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:13,976][24592] Fps is (10 sec: 31129.2, 60 sec: 42188.7, 300 sec: 42792.8). Total num frames: 1740070912. Throughput: 0: 10098.9. Samples: 184992456. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:13,976][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:14,056][626795] Updated weights for policy 0, policy_version 212412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:15,813][626795] Updated weights for policy 0, policy_version 212422 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:17,476][626795] Updated weights for policy 0, policy_version 212432 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:18,976][24592] Fps is (10 sec: 48671.1, 60 sec: 42325.2, 300 sec: 42820.5). Total num frames: 1740316672. Throughput: 0: 11270.7. Samples: 185063400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:18,978][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:19,189][626795] Updated weights for policy 0, policy_version 212442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:20,942][626795] Updated weights for policy 0, policy_version 212452 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:22,542][626795] Updated weights for policy 0, policy_version 212462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:23,975][24592] Fps is (10 sec: 48333.7, 60 sec: 42325.3, 300 sec: 43376.0). Total num frames: 1740554240. Throughput: 0: 11020.2. Samples: 185135712. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:23,976][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:24,342][626795] Updated weights for policy 0, policy_version 212472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:25,998][626795] Updated weights for policy 0, policy_version 212482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:27,726][626795] Updated weights for policy 0, policy_version 212492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:28,975][24592] Fps is (10 sec: 46695.2, 60 sec: 42188.7, 300 sec: 43348.2). Total num frames: 1740783616. Throughput: 0: 10984.3. Samples: 185170266. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:28,977][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:29,471][626795] Updated weights for policy 0, policy_version 212502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:31,270][626795] Updated weights for policy 0, policy_version 212512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:32,827][626795] Updated weights for policy 0, policy_version 212522 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:33,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44983.8, 300 sec: 43375.9). Total num frames: 1741029376. Throughput: 0: 11009.2. Samples: 185243016. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:33,977][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:34,596][626795] Updated weights for policy 0, policy_version 212532 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:36,361][626795] Updated weights for policy 0, policy_version 212542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:37,966][626795] Updated weights for policy 0, policy_version 212552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:38,975][24592] Fps is (10 sec: 49152.1, 60 sec: 45056.1, 300 sec: 43376.0). Total num frames: 1741275136. Throughput: 0: 11022.3. Samples: 185315076. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:38,977][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:39,653][626795] Updated weights for policy 0, policy_version 212562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:41,479][626795] Updated weights for policy 0, policy_version 212572 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:43,190][626795] Updated weights for policy 0, policy_version 212582 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:46,836][24592] Fps is (10 sec: 35670.0, 60 sec: 42614.5, 300 sec: 42876.8). Total num frames: 1741488128. Throughput: 0: 10353.5. Samples: 185350776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:46,837][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:48,303][626795] Updated weights for policy 0, policy_version 212592 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:48,976][24592] Fps is (10 sec: 31128.3, 60 sec: 42325.0, 300 sec: 42820.5). Total num frames: 1741586432. Throughput: 0: 10122.7. Samples: 185382186. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:48,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:49,967][626795] Updated weights for policy 0, policy_version 212602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:51,761][626795] Updated weights for policy 0, policy_version 212612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:53,393][626795] Updated weights for policy 0, policy_version 212622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:53,975][24592] Fps is (10 sec: 45899.5, 60 sec: 42052.2, 300 sec: 42792.8). Total num frames: 1741815808. Throughput: 0: 11492.4. Samples: 185453826. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:53,976][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:55,171][626795] Updated weights for policy 0, policy_version 212632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:56,844][626795] Updated weights for policy 0, policy_version 212642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:08:58,487][626795] Updated weights for policy 0, policy_version 212652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:58,975][24592] Fps is (10 sec: 47515.6, 60 sec: 42325.6, 300 sec: 43376.0). Total num frames: 1742061568. Throughput: 0: 11043.5. Samples: 185489412. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:08:58,976][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:00,242][626795] Updated weights for policy 0, policy_version 212662 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:01,849][626795] Updated weights for policy 0, policy_version 212672 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:03,577][626795] Updated weights for policy 0, policy_version 212682 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:03,975][24592] Fps is (10 sec: 48333.6, 60 sec: 42325.4, 300 sec: 43376.0). Total num frames: 1742299136. Throughput: 0: 11078.7. Samples: 185561940. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:03,977][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:03,991][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000212684_1742307328.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:04,052][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000211423_1731977216.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:05,380][626795] Updated weights for policy 0, policy_version 212692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:07,025][626795] Updated weights for policy 0, policy_version 212702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:08,740][626795] Updated weights for policy 0, policy_version 212712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:08,975][24592] Fps is (10 sec: 48332.6, 60 sec: 45115.2, 300 sec: 43375.9). Total num frames: 1742544896. Throughput: 0: 11066.5. Samples: 185633706. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:08,976][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:10,469][626795] Updated weights for policy 0, policy_version 212722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:12,167][626795] Updated weights for policy 0, policy_version 212732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:13,827][626795] Updated weights for policy 0, policy_version 212742 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:13,976][24592] Fps is (10 sec: 48331.1, 60 sec: 45192.4, 300 sec: 43348.2). Total num frames: 1742782464. Throughput: 0: 11092.1. Samples: 185669412. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:13,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:15,565][626795] Updated weights for policy 0, policy_version 212752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:17,178][626795] Updated weights for policy 0, policy_version 212762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:18,972][626795] Updated weights for policy 0, policy_version 212772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:18,975][24592] Fps is (10 sec: 48332.5, 60 sec: 45192.6, 300 sec: 43403.8). Total num frames: 1743028224. Throughput: 0: 11079.9. Samples: 185741610. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:18,976][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:23,975][24592] Fps is (10 sec: 31949.6, 60 sec: 42461.8, 300 sec: 42820.6). Total num frames: 1743101952. Throughput: 0: 10187.3. Samples: 185773506. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:23,977][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:24,109][626795] Updated weights for policy 0, policy_version 212782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:25,775][626795] Updated weights for policy 0, policy_version 212792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:27,464][626795] Updated weights for policy 0, policy_version 212802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:28,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42598.3, 300 sec: 42792.8). Total num frames: 1743339520. Throughput: 0: 10872.8. Samples: 185808948. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:28,977][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:29,203][626795] Updated weights for policy 0, policy_version 212812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:30,944][626795] Updated weights for policy 0, policy_version 212822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:32,624][626795] Updated weights for policy 0, policy_version 212832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:33,976][24592] Fps is (10 sec: 47510.6, 60 sec: 42461.4, 300 sec: 43348.1). Total num frames: 1743577088. Throughput: 0: 11073.0. Samples: 185880474. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:33,977][24592] Avg episode reward: [(0, '4.921')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:34,375][626795] Updated weights for policy 0, policy_version 212842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:35,986][626795] Updated weights for policy 0, policy_version 212852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:37,687][626795] Updated weights for policy 0, policy_version 212862 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:38,975][24592] Fps is (10 sec: 48333.3, 60 sec: 42461.8, 300 sec: 43403.7). Total num frames: 1743822848. Throughput: 0: 11114.3. Samples: 185953968. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:38,978][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:39,416][626795] Updated weights for policy 0, policy_version 212872 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:41,082][626795] Updated weights for policy 0, policy_version 212882 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:42,830][626795] Updated weights for policy 0, policy_version 212892 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:43,975][24592] Fps is (10 sec: 48335.9, 60 sec: 45018.1, 300 sec: 43375.9). Total num frames: 1744060416. Throughput: 0: 11097.3. Samples: 185988792. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:43,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:44,609][626795] Updated weights for policy 0, policy_version 212902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:46,192][626795] Updated weights for policy 0, policy_version 212912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:47,981][626795] Updated weights for policy 0, policy_version 212922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:48,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45329.4, 300 sec: 43431.7). Total num frames: 1744306176. Throughput: 0: 11105.7. Samples: 186061698. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:48,978][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:49,638][626795] Updated weights for policy 0, policy_version 212932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:51,336][626795] Updated weights for policy 0, policy_version 212942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:53,126][626795] Updated weights for policy 0, policy_version 212952 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:53,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45465.7, 300 sec: 43431.5). Total num frames: 1744543744. Throughput: 0: 11092.5. Samples: 186132870. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:53,976][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:58,277][626795] Updated weights for policy 0, policy_version 212962 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:58,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42598.4, 300 sec: 42848.3). Total num frames: 1744617472. Throughput: 0: 10583.8. Samples: 186145680. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:09:58,978][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:09:59,908][626795] Updated weights for policy 0, policy_version 212972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:01,659][626795] Updated weights for policy 0, policy_version 212982 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:03,357][626795] Updated weights for policy 0, policy_version 212992 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:03,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42598.4, 300 sec: 42848.4). Total num frames: 1744855040. Throughput: 0: 10174.2. Samples: 186199446. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:03,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:05,138][626795] Updated weights for policy 0, policy_version 213002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:06,764][626795] Updated weights for policy 0, policy_version 213012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:08,422][626795] Updated weights for policy 0, policy_version 213022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:08,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42461.9, 300 sec: 43391.3). Total num frames: 1745092608. Throughput: 0: 11080.4. Samples: 186272124. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:08,977][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:10,124][626795] Updated weights for policy 0, policy_version 213032 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:11,930][626795] Updated weights for policy 0, policy_version 213042 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:13,597][626795] Updated weights for policy 0, policy_version 213052 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:13,977][24592] Fps is (10 sec: 47507.9, 60 sec: 42461.2, 300 sec: 43403.5). Total num frames: 1745330176. Throughput: 0: 11094.8. Samples: 186308226. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:13,980][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:15,310][626795] Updated weights for policy 0, policy_version 213062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:17,118][626795] Updated weights for policy 0, policy_version 213072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:18,672][626795] Updated weights for policy 0, policy_version 213082 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:18,975][24592] Fps is (10 sec: 48331.9, 60 sec: 42461.8, 300 sec: 43431.5). Total num frames: 1745575936. Throughput: 0: 11090.5. Samples: 186379542. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:18,976][24592] Avg episode reward: [(0, '4.816')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:20,436][626795] Updated weights for policy 0, policy_version 213092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:22,182][626795] Updated weights for policy 0, policy_version 213102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:23,841][626795] Updated weights for policy 0, policy_version 213112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:23,976][24592] Fps is (10 sec: 48337.5, 60 sec: 45192.4, 300 sec: 43431.4). Total num frames: 1745813504. Throughput: 0: 11057.8. Samples: 186451572. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:23,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:25,523][626795] Updated weights for policy 0, policy_version 213122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:27,287][626795] Updated weights for policy 0, policy_version 213132 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:28,975][24592] Fps is (10 sec: 47513.8, 60 sec: 45192.5, 300 sec: 43431.5). Total num frames: 1746051072. Throughput: 0: 11086.1. Samples: 186487668. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:28,977][24592] Avg episode reward: [(0, '4.930')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:29,019][626795] Updated weights for policy 0, policy_version 213142 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:33,975][24592] Fps is (10 sec: 31949.3, 60 sec: 42598.8, 300 sec: 42876.1). Total num frames: 1746132992. Throughput: 0: 10450.6. Samples: 186531978. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:33,978][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:34,088][626795] Updated weights for policy 0, policy_version 213152 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:35,882][626795] Updated weights for policy 0, policy_version 213162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:37,563][626795] Updated weights for policy 0, policy_version 213172 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:38,975][24592] Fps is (10 sec: 31949.5, 60 sec: 42462.0, 300 sec: 42876.1). Total num frames: 1746370560. Throughput: 0: 10166.7. Samples: 186590370. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:38,976][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:39,187][626795] Updated weights for policy 0, policy_version 213182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:40,953][626795] Updated weights for policy 0, policy_version 213192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:42,694][626795] Updated weights for policy 0, policy_version 213202 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:43,975][24592] Fps is (10 sec: 48333.3, 60 sec: 42598.4, 300 sec: 43417.4). Total num frames: 1746616320. Throughput: 0: 10689.5. Samples: 186626706. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:43,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:44,356][626795] Updated weights for policy 0, policy_version 213212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:46,056][626795] Updated weights for policy 0, policy_version 213222 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:47,789][626795] Updated weights for policy 0, policy_version 213232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:48,975][24592] Fps is (10 sec: 48332.4, 60 sec: 42461.9, 300 sec: 43459.3). Total num frames: 1746853888. Throughput: 0: 11083.1. Samples: 186698184. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:48,976][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:49,387][626795] Updated weights for policy 0, policy_version 213242 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:51,211][626795] Updated weights for policy 0, policy_version 213252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:53,032][626795] Updated weights for policy 0, policy_version 213262 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:53,975][24592] Fps is (10 sec: 46694.4, 60 sec: 42325.4, 300 sec: 43431.5). Total num frames: 1747083264. Throughput: 0: 11015.7. Samples: 186767832. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:53,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:54,809][626795] Updated weights for policy 0, policy_version 213272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:56,448][626795] Updated weights for policy 0, policy_version 213282 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:58,180][626795] Updated weights for policy 0, policy_version 213292 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:58,975][24592] Fps is (10 sec: 46694.4, 60 sec: 45056.0, 300 sec: 43403.8). Total num frames: 1747320832. Throughput: 0: 11010.3. Samples: 186803676. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:10:58,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:10:59,921][626795] Updated weights for policy 0, policy_version 213302 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:01,650][626795] Updated weights for policy 0, policy_version 213312 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:03,348][626795] Updated weights for policy 0, policy_version 213322 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:03,976][24592] Fps is (10 sec: 47508.8, 60 sec: 45055.3, 300 sec: 43403.6). Total num frames: 1747558400. Throughput: 0: 11027.3. Samples: 186875778. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:03,978][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:04,017][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000213326_1747566592.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:04,069][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000212044_1737064448.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:05,125][626795] Updated weights for policy 0, policy_version 213332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:09,329][24592] Fps is (10 sec: 31649.0, 60 sec: 42348.9, 300 sec: 42824.8). Total num frames: 1747648512. Throughput: 0: 10120.0. Samples: 186910548. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:09,330][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:10,268][626795] Updated weights for policy 0, policy_version 213342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:11,956][626795] Updated weights for policy 0, policy_version 213352 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:13,586][626795] Updated weights for policy 0, policy_version 213362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:13,990][24592] Fps is (10 sec: 31088.1, 60 sec: 42316.1, 300 sec: 42818.5). Total num frames: 1747869696. Throughput: 0: 10087.5. Samples: 186941748. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:13,991][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:15,366][626795] Updated weights for policy 0, policy_version 213372 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:17,077][626795] Updated weights for policy 0, policy_version 213382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:18,810][626795] Updated weights for policy 0, policy_version 213392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:18,979][24592] Fps is (10 sec: 47538.0, 60 sec: 42186.3, 300 sec: 43345.3). Total num frames: 1748107264. Throughput: 0: 10702.5. Samples: 187013628. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:18,981][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:19,029][626772] Signal inference workers to stop experience collection... (2550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:19,029][626772] Signal inference workers to resume experience collection... (2550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:19,051][626795] InferenceWorker_p0-w0: stopping experience collection (2550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:19,051][626795] InferenceWorker_p0-w0: resuming experience collection (2550 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:20,445][626795] Updated weights for policy 0, policy_version 213402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:22,236][626795] Updated weights for policy 0, policy_version 213412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:23,925][626795] Updated weights for policy 0, policy_version 213422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:23,976][24592] Fps is (10 sec: 48400.8, 60 sec: 42325.3, 300 sec: 43459.3). Total num frames: 1748353024. Throughput: 0: 11011.8. Samples: 187085904. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:23,978][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:25,601][626795] Updated weights for policy 0, policy_version 213432 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:27,246][626795] Updated weights for policy 0, policy_version 213442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:28,943][626795] Updated weights for policy 0, policy_version 213452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:28,975][24592] Fps is (10 sec: 49170.5, 60 sec: 42461.9, 300 sec: 43487.1). Total num frames: 1748598784. Throughput: 0: 11000.9. Samples: 187121748. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:28,978][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:30,677][626795] Updated weights for policy 0, policy_version 213462 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:32,399][626795] Updated weights for policy 0, policy_version 213472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:33,976][24592] Fps is (10 sec: 48332.3, 60 sec: 45055.8, 300 sec: 43487.0). Total num frames: 1748836352. Throughput: 0: 11037.6. Samples: 187194882. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:33,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:34,043][626795] Updated weights for policy 0, policy_version 213482 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:35,813][626795] Updated weights for policy 0, policy_version 213492 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:37,529][626795] Updated weights for policy 0, policy_version 213502 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:38,976][24592] Fps is (10 sec: 47512.0, 60 sec: 45055.7, 300 sec: 43487.0). Total num frames: 1749073920. Throughput: 0: 11095.4. Samples: 187267128. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:38,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:39,224][626795] Updated weights for policy 0, policy_version 213512 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:40,875][626795] Updated weights for policy 0, policy_version 213522 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:44,959][24592] Fps is (10 sec: 33564.2, 60 sec: 42448.7, 300 sec: 42982.7). Total num frames: 1749204992. Throughput: 0: 10078.7. Samples: 187267128. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:44,960][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:46,096][626795] Updated weights for policy 0, policy_version 213532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:47,766][626795] Updated weights for policy 0, policy_version 213542 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:48,976][24592] Fps is (10 sec: 31949.4, 60 sec: 42325.2, 300 sec: 42987.1). Total num frames: 1749393408. Throughput: 0: 10175.5. Samples: 187333668. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:48,978][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:49,483][626795] Updated weights for policy 0, policy_version 213552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:51,052][626795] Updated weights for policy 0, policy_version 213562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:52,893][626795] Updated weights for policy 0, policy_version 213572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:53,975][24592] Fps is (10 sec: 47245.0, 60 sec: 42461.8, 300 sec: 43391.1). Total num frames: 1749630976. Throughput: 0: 11100.7. Samples: 187406154. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:53,977][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:54,537][626795] Updated weights for policy 0, policy_version 213582 (0.0046)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:56,268][626795] Updated weights for policy 0, policy_version 213592 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:57,945][626795] Updated weights for policy 0, policy_version 213602 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:58,975][24592] Fps is (10 sec: 48333.7, 60 sec: 42598.4, 300 sec: 43459.3). Total num frames: 1749876736. Throughput: 0: 11113.5. Samples: 187441698. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:11:58,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:11:59,612][626795] Updated weights for policy 0, policy_version 213612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:01,295][626795] Updated weights for policy 0, policy_version 213622 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:03,102][626795] Updated weights for policy 0, policy_version 213632 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:03,975][24592] Fps is (10 sec: 47513.8, 60 sec: 42462.6, 300 sec: 43431.5). Total num frames: 1750106112. Throughput: 0: 11117.6. Samples: 187513878. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:03,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:04,734][626795] Updated weights for policy 0, policy_version 213642 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:06,519][626795] Updated weights for policy 0, policy_version 213652 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:08,167][626795] Updated weights for policy 0, policy_version 213662 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:08,975][24592] Fps is (10 sec: 47513.6, 60 sec: 45323.1, 300 sec: 43431.5). Total num frames: 1750351872. Throughput: 0: 11105.4. Samples: 187585644. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:08,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:09,990][626795] Updated weights for policy 0, policy_version 213672 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:11,536][626795] Updated weights for policy 0, policy_version 213682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:13,320][626795] Updated weights for policy 0, policy_version 213692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:13,975][24592] Fps is (10 sec: 48332.4, 60 sec: 45339.9, 300 sec: 43431.5). Total num frames: 1750589440. Throughput: 0: 11113.9. Samples: 187621872. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:13,976][24592] Avg episode reward: [(0, '4.895')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:15,039][626795] Updated weights for policy 0, policy_version 213702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:16,800][626795] Updated weights for policy 0, policy_version 213712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:20,585][24592] Fps is (10 sec: 33869.8, 60 sec: 42817.7, 300 sec: 42919.6). Total num frames: 1750745088. Throughput: 0: 9932.4. Samples: 187657824. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:20,586][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:21,814][626795] Updated weights for policy 0, policy_version 213722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:23,548][626795] Updated weights for policy 0, policy_version 213732 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:23,975][24592] Fps is (10 sec: 31948.7, 60 sec: 42598.5, 300 sec: 42903.8). Total num frames: 1750908928. Throughput: 0: 10172.7. Samples: 187724898. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:23,976][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:25,246][626795] Updated weights for policy 0, policy_version 213742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:26,963][626795] Updated weights for policy 0, policy_version 213752 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:28,754][626795] Updated weights for policy 0, policy_version 213762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:28,976][24592] Fps is (10 sec: 47838.1, 60 sec: 42461.5, 300 sec: 43436.4). Total num frames: 1751146496. Throughput: 0: 11220.6. Samples: 187761024. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:28,977][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:30,514][626795] Updated weights for policy 0, policy_version 213772 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:32,096][626795] Updated weights for policy 0, policy_version 213782 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:33,814][626795] Updated weights for policy 0, policy_version 213792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:33,975][24592] Fps is (10 sec: 48333.5, 60 sec: 42598.7, 300 sec: 43459.3). Total num frames: 1751392256. Throughput: 0: 11082.2. Samples: 187832364. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:33,977][24592] Avg episode reward: [(0, '4.959')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:35,515][626795] Updated weights for policy 0, policy_version 213802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:37,244][626795] Updated weights for policy 0, policy_version 213812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:38,893][626795] Updated weights for policy 0, policy_version 213822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:38,975][24592] Fps is (10 sec: 48335.4, 60 sec: 42598.6, 300 sec: 43459.3). Total num frames: 1751629824. Throughput: 0: 11079.6. Samples: 187904736. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:38,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:40,623][626795] Updated weights for policy 0, policy_version 213832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:42,284][626795] Updated weights for policy 0, policy_version 213842 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:43,975][24592] Fps is (10 sec: 47513.4, 60 sec: 45112.8, 300 sec: 43459.3). Total num frames: 1751867392. Throughput: 0: 11097.3. Samples: 187941078. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:43,977][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:44,035][626795] Updated weights for policy 0, policy_version 213852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:45,776][626795] Updated weights for policy 0, policy_version 213862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:47,360][626795] Updated weights for policy 0, policy_version 213872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:48,976][24592] Fps is (10 sec: 48330.2, 60 sec: 45328.7, 300 sec: 43459.2). Total num frames: 1752113152. Throughput: 0: 11099.3. Samples: 188013354. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:48,977][24592] Avg episode reward: [(0, '4.921')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:49,202][626795] Updated weights for policy 0, policy_version 213882 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:50,890][626795] Updated weights for policy 0, policy_version 213892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:52,533][626795] Updated weights for policy 0, policy_version 213902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:56,224][24592] Fps is (10 sec: 34778.0, 60 sec: 42770.4, 300 sec: 42965.2). Total num frames: 1752293376. Throughput: 0: 9811.1. Samples: 188049204. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:56,226][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:57,745][626795] Updated weights for policy 0, policy_version 213912 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:58,975][24592] Fps is (10 sec: 31131.5, 60 sec: 42461.9, 300 sec: 42931.6). Total num frames: 1752424448. Throughput: 0: 10176.9. Samples: 188079834. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:12:58,976][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:12:59,468][626795] Updated weights for policy 0, policy_version 213922 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:01,130][626795] Updated weights for policy 0, policy_version 213932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:02,842][626795] Updated weights for policy 0, policy_version 213942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:03,975][24592] Fps is (10 sec: 47557.5, 60 sec: 42598.3, 300 sec: 43458.6). Total num frames: 1752662016. Throughput: 0: 11378.6. Samples: 188151546. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:03,977][24592] Avg episode reward: [(0, '4.952')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000213948_1752662016.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:04,054][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000212684_1742307328.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:04,631][626795] Updated weights for policy 0, policy_version 213952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:06,282][626795] Updated weights for policy 0, policy_version 213962 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:07,916][626795] Updated weights for policy 0, policy_version 213972 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:08,975][24592] Fps is (10 sec: 48332.5, 60 sec: 42598.3, 300 sec: 43514.8). Total num frames: 1752907776. Throughput: 0: 11084.4. Samples: 188223696. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:08,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:09,646][626795] Updated weights for policy 0, policy_version 213982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:11,312][626795] Updated weights for policy 0, policy_version 213992 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:12,997][626795] Updated weights for policy 0, policy_version 214002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:13,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42598.4, 300 sec: 43487.0). Total num frames: 1753145344. Throughput: 0: 11075.6. Samples: 188259420. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:13,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:14,780][626795] Updated weights for policy 0, policy_version 214012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:16,416][626795] Updated weights for policy 0, policy_version 214022 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:18,081][626795] Updated weights for policy 0, policy_version 214032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:18,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45316.0, 300 sec: 43514.8). Total num frames: 1753391104. Throughput: 0: 11120.7. Samples: 188332794. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:18,977][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:19,744][626795] Updated weights for policy 0, policy_version 214042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:21,476][626795] Updated weights for policy 0, policy_version 214052 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:23,365][626795] Updated weights for policy 0, policy_version 214062 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:23,975][24592] Fps is (10 sec: 47513.9, 60 sec: 45192.6, 300 sec: 43514.8). Total num frames: 1753620480. Throughput: 0: 11060.9. Samples: 188402478. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:23,976][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:25,327][626795] Updated weights for policy 0, policy_version 214072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:27,002][626795] Updated weights for policy 0, policy_version 214082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:31,845][24592] Fps is (10 sec: 33736.5, 60 sec: 42609.0, 300 sec: 42958.1). Total num frames: 1753825280. Throughput: 0: 10348.1. Samples: 188436438. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:31,846][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:32,099][626795] Updated weights for policy 0, policy_version 214092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:33,861][626795] Updated weights for policy 0, policy_version 214102 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:33,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42325.3, 300 sec: 42903.9). Total num frames: 1753931776. Throughput: 0: 10086.0. Samples: 188467218. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:33,976][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:35,483][626795] Updated weights for policy 0, policy_version 214112 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:37,177][626795] Updated weights for policy 0, policy_version 214122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:38,849][626795] Updated weights for policy 0, policy_version 214132 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:38,975][24592] Fps is (10 sec: 48253.3, 60 sec: 42325.4, 300 sec: 43408.2). Total num frames: 1754169344. Throughput: 0: 11483.6. Samples: 188540142. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:38,976][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:40,517][626795] Updated weights for policy 0, policy_version 214142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:42,214][626795] Updated weights for policy 0, policy_version 214152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:43,971][626795] Updated weights for policy 0, policy_version 214162 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:43,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42461.9, 300 sec: 43487.1). Total num frames: 1754415104. Throughput: 0: 11043.6. Samples: 188576796. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:43,976][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:45,625][626795] Updated weights for policy 0, policy_version 214172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:47,375][626795] Updated weights for policy 0, policy_version 214182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:48,948][626795] Updated weights for policy 0, policy_version 214192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:48,975][24592] Fps is (10 sec: 49151.5, 60 sec: 42462.2, 300 sec: 43542.6). Total num frames: 1754660864. Throughput: 0: 11074.0. Samples: 188649876. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:48,977][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:50,611][626795] Updated weights for policy 0, policy_version 214202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:52,359][626795] Updated weights for policy 0, policy_version 214212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:53,975][24592] Fps is (10 sec: 48332.8, 60 sec: 45108.1, 300 sec: 43514.8). Total num frames: 1754898432. Throughput: 0: 11090.9. Samples: 188722788. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:53,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:54,041][626795] Updated weights for policy 0, policy_version 214222 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:55,788][626795] Updated weights for policy 0, policy_version 214232 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:57,416][626795] Updated weights for policy 0, policy_version 214242 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:58,976][24592] Fps is (10 sec: 48330.9, 60 sec: 45328.7, 300 sec: 43542.5). Total num frames: 1755144192. Throughput: 0: 11094.7. Samples: 188758686. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:13:58,978][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:13:59,178][626795] Updated weights for policy 0, policy_version 214252 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:00,839][626795] Updated weights for policy 0, policy_version 214262 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:02,525][626795] Updated weights for policy 0, policy_version 214272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:03,976][24592] Fps is (10 sec: 48331.3, 60 sec: 45328.9, 300 sec: 43514.8). Total num frames: 1755381760. Throughput: 0: 11082.7. Samples: 188831520. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:03,978][24592] Avg episode reward: [(0, '5.002')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:07,530][626795] Updated weights for policy 0, policy_version 214282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:08,975][24592] Fps is (10 sec: 31950.6, 60 sec: 42598.5, 300 sec: 42987.2). Total num frames: 1755463680. Throughput: 0: 10232.2. Samples: 188862924. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:08,976][24592] Avg episode reward: [(0, '4.963')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:09,251][626795] Updated weights for policy 0, policy_version 214292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:11,005][626795] Updated weights for policy 0, policy_version 214302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:12,678][626795] Updated weights for policy 0, policy_version 214312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:13,978][24592] Fps is (10 sec: 31942.7, 60 sec: 42596.9, 300 sec: 42959.1). Total num frames: 1755701248. Throughput: 0: 10978.9. Samples: 188899008. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:13,979][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:14,361][626795] Updated weights for policy 0, policy_version 214322 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:16,069][626795] Updated weights for policy 0, policy_version 214332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:17,787][626795] Updated weights for policy 0, policy_version 214342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:18,975][24592] Fps is (10 sec: 48332.5, 60 sec: 42598.4, 300 sec: 43542.6). Total num frames: 1755947008. Throughput: 0: 11205.2. Samples: 188971452. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:18,976][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:19,442][626795] Updated weights for policy 0, policy_version 214352 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:21,131][626795] Updated weights for policy 0, policy_version 214362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:22,862][626795] Updated weights for policy 0, policy_version 214372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:23,976][24592] Fps is (10 sec: 49161.0, 60 sec: 42871.2, 300 sec: 43570.3). Total num frames: 1756192768. Throughput: 0: 11209.8. Samples: 189044586. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:23,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:24,497][626795] Updated weights for policy 0, policy_version 214382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:26,258][626795] Updated weights for policy 0, policy_version 214392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:27,938][626795] Updated weights for policy 0, policy_version 214402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:28,976][24592] Fps is (10 sec: 48331.7, 60 sec: 45598.3, 300 sec: 43570.4). Total num frames: 1756430336. Throughput: 0: 11194.7. Samples: 189080562. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:28,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:29,593][626795] Updated weights for policy 0, policy_version 214412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:31,338][626795] Updated weights for policy 0, policy_version 214422 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:33,015][626795] Updated weights for policy 0, policy_version 214432 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:33,975][24592] Fps is (10 sec: 48334.7, 60 sec: 45738.7, 300 sec: 43570.3). Total num frames: 1756676096. Throughput: 0: 11188.4. Samples: 189153354. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:33,976][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:34,691][626795] Updated weights for policy 0, policy_version 214442 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:36,390][626795] Updated weights for policy 0, policy_version 214452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:38,115][626795] Updated weights for policy 0, policy_version 214462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:38,975][24592] Fps is (10 sec: 48333.8, 60 sec: 45738.7, 300 sec: 43570.3). Total num frames: 1756913664. Throughput: 0: 11175.1. Samples: 189225666. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:38,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:43,204][626795] Updated weights for policy 0, policy_version 214472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:43,975][24592] Fps is (10 sec: 31129.5, 60 sec: 42871.4, 300 sec: 42987.2). Total num frames: 1756987392. Throughput: 0: 10682.0. Samples: 189239370. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:43,977][24592] Avg episode reward: [(0, '4.272')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:44,880][626795] Updated weights for policy 0, policy_version 214482 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:46,624][626795] Updated weights for policy 0, policy_version 214492 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:48,204][626795] Updated weights for policy 0, policy_version 214502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:48,975][24592] Fps is (10 sec: 31948.6, 60 sec: 42871.5, 300 sec: 43014.9). Total num frames: 1757233152. Throughput: 0: 10253.0. Samples: 189292902. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:48,976][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:49,980][626795] Updated weights for policy 0, policy_version 214512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:51,670][626795] Updated weights for policy 0, policy_version 214522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:53,339][626795] Updated weights for policy 0, policy_version 214532 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:53,975][24592] Fps is (10 sec: 48333.1, 60 sec: 42871.5, 300 sec: 43570.3). Total num frames: 1757470720. Throughput: 0: 11176.4. Samples: 189365862. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:53,977][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:55,123][626795] Updated weights for policy 0, policy_version 214542 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:56,797][626795] Updated weights for policy 0, policy_version 214552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:14:58,442][626795] Updated weights for policy 0, policy_version 214562 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:58,975][24592] Fps is (10 sec: 47513.7, 60 sec: 42735.2, 300 sec: 43570.3). Total num frames: 1757708288. Throughput: 0: 11165.3. Samples: 189401424. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:14:58,979][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:00,188][626795] Updated weights for policy 0, policy_version 214572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:01,891][626795] Updated weights for policy 0, policy_version 214582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:03,549][626795] Updated weights for policy 0, policy_version 214592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:03,975][24592] Fps is (10 sec: 48332.4, 60 sec: 42871.7, 300 sec: 43598.1). Total num frames: 1757954048. Throughput: 0: 11176.5. Samples: 189474396. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:03,977][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000214594_1757954048.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:04,049][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000213326_1747566592.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:05,365][626795] Updated weights for policy 0, policy_version 214602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:06,898][626795] Updated weights for policy 0, policy_version 214612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:08,679][626795] Updated weights for policy 0, policy_version 214622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:08,975][24592] Fps is (10 sec: 49152.5, 60 sec: 45602.1, 300 sec: 43626.1). Total num frames: 1758199808. Throughput: 0: 11158.1. Samples: 189546696. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:08,976][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:10,318][626795] Updated weights for policy 0, policy_version 214632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:12,018][626795] Updated weights for policy 0, policy_version 214642 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:13,668][626795] Updated weights for policy 0, policy_version 214652 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:13,975][24592] Fps is (10 sec: 48333.1, 60 sec: 45603.8, 300 sec: 43598.1). Total num frames: 1758437376. Throughput: 0: 11161.8. Samples: 189582840. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:13,976][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:18,902][626795] Updated weights for policy 0, policy_version 214662 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:18,987][24592] Fps is (10 sec: 31092.7, 60 sec: 42726.5, 300 sec: 43041.0). Total num frames: 1758511104. Throughput: 0: 10534.6. Samples: 189627534. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:18,988][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:20,605][626795] Updated weights for policy 0, policy_version 214672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:22,179][626795] Updated weights for policy 0, policy_version 214682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:23,931][626795] Updated weights for policy 0, policy_version 214692 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:23,975][24592] Fps is (10 sec: 31948.5, 60 sec: 42735.2, 300 sec: 43070.5). Total num frames: 1758756864. Throughput: 0: 10250.1. Samples: 189686922. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:23,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:25,571][626795] Updated weights for policy 0, policy_version 214702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:27,330][626795] Updated weights for policy 0, policy_version 214712 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:28,976][24592] Fps is (10 sec: 48388.3, 60 sec: 42734.8, 300 sec: 43598.1). Total num frames: 1758994432. Throughput: 0: 10754.5. Samples: 189723324. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:28,977][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:28,986][626795] Updated weights for policy 0, policy_version 214722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:30,682][626795] Updated weights for policy 0, policy_version 214732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:32,378][626795] Updated weights for policy 0, policy_version 214742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:33,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42734.9, 300 sec: 43625.9). Total num frames: 1759240192. Throughput: 0: 11166.0. Samples: 189795372. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:33,977][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:34,130][626795] Updated weights for policy 0, policy_version 214752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:35,730][626795] Updated weights for policy 0, policy_version 214762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:37,496][626795] Updated weights for policy 0, policy_version 214772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:38,975][24592] Fps is (10 sec: 49153.9, 60 sec: 42871.5, 300 sec: 43625.9). Total num frames: 1759485952. Throughput: 0: 11159.2. Samples: 189868026. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:38,976][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:39,199][626795] Updated weights for policy 0, policy_version 214782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:40,818][626795] Updated weights for policy 0, policy_version 214792 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:42,563][626795] Updated weights for policy 0, policy_version 214802 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:43,976][24592] Fps is (10 sec: 48331.7, 60 sec: 45602.0, 300 sec: 43625.8). Total num frames: 1759723520. Throughput: 0: 11161.3. Samples: 189903684. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:43,977][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:44,319][626795] Updated weights for policy 0, policy_version 214812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:45,940][626795] Updated weights for policy 0, policy_version 214822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:47,713][626795] Updated weights for policy 0, policy_version 214832 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:48,975][24592] Fps is (10 sec: 47513.8, 60 sec: 45465.7, 300 sec: 43653.6). Total num frames: 1759961088. Throughput: 0: 11145.5. Samples: 189975942. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:48,977][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:49,550][626795] Updated weights for policy 0, policy_version 214842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:54,302][24592] Fps is (10 sec: 31731.3, 60 sec: 42774.9, 300 sec: 43106.0). Total num frames: 1760051200. Throughput: 0: 10249.0. Samples: 190011252. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:54,303][24592] Avg episode reward: [(0, '4.289')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:54,465][626795] Updated weights for policy 0, policy_version 214852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:56,281][626795] Updated weights for policy 0, policy_version 214862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:57,928][626795] Updated weights for policy 0, policy_version 214872 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:58,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42735.0, 300 sec: 43098.4). Total num frames: 1760272384. Throughput: 0: 10218.5. Samples: 190042674. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:15:58,977][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:15:59,731][626795] Updated weights for policy 0, policy_version 214882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:01,376][626795] Updated weights for policy 0, policy_version 214892 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:03,014][626795] Updated weights for policy 0, policy_version 214902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:03,975][24592] Fps is (10 sec: 48272.4, 60 sec: 42734.9, 300 sec: 43678.2). Total num frames: 1760518144. Throughput: 0: 10834.6. Samples: 190114962. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:03,976][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:04,757][626795] Updated weights for policy 0, policy_version 214912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:06,485][626795] Updated weights for policy 0, policy_version 214922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:08,048][626795] Updated weights for policy 0, policy_version 214932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:08,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42598.4, 300 sec: 43683.5). Total num frames: 1760755712. Throughput: 0: 11121.6. Samples: 190187394. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:08,976][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:09,755][626795] Updated weights for policy 0, policy_version 214942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:11,494][626795] Updated weights for policy 0, policy_version 214952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:13,248][626795] Updated weights for policy 0, policy_version 214962 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:13,975][24592] Fps is (10 sec: 48333.3, 60 sec: 42734.9, 300 sec: 43709.7). Total num frames: 1761001472. Throughput: 0: 11130.6. Samples: 190224198. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:13,977][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:14,829][626795] Updated weights for policy 0, policy_version 214972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:16,502][626795] Updated weights for policy 0, policy_version 214982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:18,178][626795] Updated weights for policy 0, policy_version 214992 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:18,975][24592] Fps is (10 sec: 49152.0, 60 sec: 45611.2, 300 sec: 43709.2). Total num frames: 1761247232. Throughput: 0: 11155.3. Samples: 190297362. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:18,976][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:19,915][626795] Updated weights for policy 0, policy_version 215002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:21,540][626795] Updated weights for policy 0, policy_version 215012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:23,313][626795] Updated weights for policy 0, policy_version 215022 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:23,975][24592] Fps is (10 sec: 49152.1, 60 sec: 45602.2, 300 sec: 43709.2). Total num frames: 1761492992. Throughput: 0: 11158.5. Samples: 190370160. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:23,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:24,994][626795] Updated weights for policy 0, policy_version 215032 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:30,028][24592] Fps is (10 sec: 33352.2, 60 sec: 42937.4, 300 sec: 43166.4). Total num frames: 1761615872. Throughput: 0: 10129.2. Samples: 190370160. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:30,030][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:30,161][626795] Updated weights for policy 0, policy_version 215042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:31,111][626772] Signal inference workers to stop experience collection... (2600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:31,117][626772] Signal inference workers to resume experience collection... (2600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:31,131][626795] InferenceWorker_p0-w0: stopping experience collection (2600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:31,135][626795] InferenceWorker_p0-w0: resuming experience collection (2600 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:31,870][626795] Updated weights for policy 0, policy_version 215052 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:33,546][626795] Updated weights for policy 0, policy_version 215062 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:33,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42735.0, 300 sec: 43153.8). Total num frames: 1761804288. Throughput: 0: 10223.9. Samples: 190436016. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:33,976][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:35,337][626795] Updated weights for policy 0, policy_version 215072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:36,996][626795] Updated weights for policy 0, policy_version 215082 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:38,603][626795] Updated weights for policy 0, policy_version 215092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:38,976][24592] Fps is (10 sec: 47610.4, 60 sec: 42598.2, 300 sec: 43660.3). Total num frames: 1762041856. Throughput: 0: 11157.2. Samples: 190509678. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:38,977][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:40,267][626795] Updated weights for policy 0, policy_version 215102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:42,082][626795] Updated weights for policy 0, policy_version 215112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:43,656][626795] Updated weights for policy 0, policy_version 215122 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:43,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42735.2, 300 sec: 43709.2). Total num frames: 1762287616. Throughput: 0: 11166.9. Samples: 190545186. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:43,976][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:45,343][626795] Updated weights for policy 0, policy_version 215132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:47,104][626795] Updated weights for policy 0, policy_version 215142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:48,792][626795] Updated weights for policy 0, policy_version 215152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:48,975][24592] Fps is (10 sec: 48333.6, 60 sec: 42734.9, 300 sec: 43709.2). Total num frames: 1762525184. Throughput: 0: 11185.1. Samples: 190618290. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:48,976][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:50,363][626795] Updated weights for policy 0, policy_version 215162 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:52,141][626795] Updated weights for policy 0, policy_version 215172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:53,884][626795] Updated weights for policy 0, policy_version 215182 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:53,975][24592] Fps is (10 sec: 48332.3, 60 sec: 45577.4, 300 sec: 43709.2). Total num frames: 1762770944. Throughput: 0: 11188.6. Samples: 190690884. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:53,976][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:55,492][626795] Updated weights for policy 0, policy_version 215192 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:57,274][626795] Updated weights for policy 0, policy_version 215202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:16:58,974][626795] Updated weights for policy 0, policy_version 215212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:58,975][24592] Fps is (10 sec: 49151.9, 60 sec: 45738.6, 300 sec: 43764.7). Total num frames: 1763016704. Throughput: 0: 11181.1. Samples: 190727346. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:16:58,976][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:00,599][626795] Updated weights for policy 0, policy_version 215222 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:05,695][24592] Fps is (10 sec: 34252.5, 60 sec: 43004.7, 300 sec: 43207.5). Total num frames: 1763172352. Throughput: 0: 9973.0. Samples: 190763292. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:05,696][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:05,699][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000215231_1763172352.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:05,749][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000213948_1752662016.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:05,823][626795] Updated weights for policy 0, policy_version 215232 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:07,587][626795] Updated weights for policy 0, policy_version 215242 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:08,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42871.4, 300 sec: 43181.6). Total num frames: 1763328000. Throughput: 0: 10211.1. Samples: 190829658. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:08,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:09,181][626795] Updated weights for policy 0, policy_version 215252 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:10,870][626795] Updated weights for policy 0, policy_version 215262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:12,643][626795] Updated weights for policy 0, policy_version 215272 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:13,975][24592] Fps is (10 sec: 48474.1, 60 sec: 42871.4, 300 sec: 43725.6). Total num frames: 1763573760. Throughput: 0: 11278.7. Samples: 190865826. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:13,976][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:14,229][626795] Updated weights for policy 0, policy_version 215282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:15,956][626795] Updated weights for policy 0, policy_version 215292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:17,731][626795] Updated weights for policy 0, policy_version 215302 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:18,975][24592] Fps is (10 sec: 48332.6, 60 sec: 42734.9, 300 sec: 43737.0). Total num frames: 1763811328. Throughput: 0: 11175.7. Samples: 190938924. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:18,978][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:19,350][626795] Updated weights for policy 0, policy_version 215312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:21,174][626795] Updated weights for policy 0, policy_version 215322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:22,811][626795] Updated weights for policy 0, policy_version 215332 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:23,975][24592] Fps is (10 sec: 48333.0, 60 sec: 42734.9, 300 sec: 43764.8). Total num frames: 1764057088. Throughput: 0: 11148.9. Samples: 191011374. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:23,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:24,407][626795] Updated weights for policy 0, policy_version 215342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:26,154][626795] Updated weights for policy 0, policy_version 215352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:27,915][626795] Updated weights for policy 0, policy_version 215362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:28,976][24592] Fps is (10 sec: 48331.5, 60 sec: 45443.6, 300 sec: 43736.9). Total num frames: 1764294656. Throughput: 0: 11156.4. Samples: 191047230. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:28,977][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:29,532][626795] Updated weights for policy 0, policy_version 215372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:31,216][626795] Updated weights for policy 0, policy_version 215382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:32,976][626795] Updated weights for policy 0, policy_version 215392 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:33,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45602.1, 300 sec: 43764.7). Total num frames: 1764540416. Throughput: 0: 11145.2. Samples: 191119824. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:33,977][24592] Avg episode reward: [(0, '4.991')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:34,625][626795] Updated weights for policy 0, policy_version 215402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:36,343][626795] Updated weights for policy 0, policy_version 215412 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:41,355][24592] Fps is (10 sec: 35072.5, 60 sec: 43074.6, 300 sec: 43249.2). Total num frames: 1764728832. Throughput: 0: 9818.5. Samples: 191156082. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:41,357][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:41,440][626795] Updated weights for policy 0, policy_version 215422 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:43,175][626795] Updated weights for policy 0, policy_version 215432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:43,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42734.9, 300 sec: 43181.6). Total num frames: 1764851712. Throughput: 0: 10216.8. Samples: 191187102. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:43,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:44,977][626795] Updated weights for policy 0, policy_version 215442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:46,615][626795] Updated weights for policy 0, policy_version 215452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:48,377][626795] Updated weights for policy 0, policy_version 215462 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:48,975][24592] Fps is (10 sec: 47301.2, 60 sec: 42735.0, 300 sec: 43709.1). Total num frames: 1765089280. Throughput: 0: 11431.9. Samples: 191258076. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:48,977][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:50,155][626795] Updated weights for policy 0, policy_version 215472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:51,795][626795] Updated weights for policy 0, policy_version 215482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:53,477][626795] Updated weights for policy 0, policy_version 215492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:53,975][24592] Fps is (10 sec: 48332.0, 60 sec: 42734.9, 300 sec: 43764.7). Total num frames: 1765335040. Throughput: 0: 11123.4. Samples: 191330214. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:53,976][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:55,296][626795] Updated weights for policy 0, policy_version 215502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:56,783][626795] Updated weights for policy 0, policy_version 215512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:17:58,604][626795] Updated weights for policy 0, policy_version 215522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:58,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42598.5, 300 sec: 43764.7). Total num frames: 1765572608. Throughput: 0: 11125.6. Samples: 191366478. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:17:58,976][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:00,216][626795] Updated weights for policy 0, policy_version 215532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:02,010][626795] Updated weights for policy 0, policy_version 215542 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:03,649][626795] Updated weights for policy 0, policy_version 215552 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:03,975][24592] Fps is (10 sec: 47514.2, 60 sec: 45260.6, 300 sec: 43737.0). Total num frames: 1765810176. Throughput: 0: 11110.1. Samples: 191438880. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:03,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:05,426][626795] Updated weights for policy 0, policy_version 215562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:06,990][626795] Updated weights for policy 0, policy_version 215572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:08,698][626795] Updated weights for policy 0, policy_version 215582 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:08,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45465.6, 300 sec: 43764.7). Total num frames: 1766055936. Throughput: 0: 11106.9. Samples: 191511186. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:08,977][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:10,461][626795] Updated weights for policy 0, policy_version 215592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:12,213][626795] Updated weights for policy 0, policy_version 215602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:17,010][24592] Fps is (10 sec: 35822.4, 60 sec: 42886.7, 300 sec: 43236.6). Total num frames: 1766277120. Throughput: 0: 10401.6. Samples: 191546868. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:17,011][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:17,280][626795] Updated weights for policy 0, policy_version 215612 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:18,976][24592] Fps is (10 sec: 31128.5, 60 sec: 42598.2, 300 sec: 43209.3). Total num frames: 1766367232. Throughput: 0: 10182.2. Samples: 191578026. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:18,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:19,090][626795] Updated weights for policy 0, policy_version 215622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:20,779][626795] Updated weights for policy 0, policy_version 215632 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:22,679][626795] Updated weights for policy 0, policy_version 215642 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:23,975][24592] Fps is (10 sec: 45870.4, 60 sec: 42325.3, 300 sec: 43717.9). Total num frames: 1766596608. Throughput: 0: 11528.7. Samples: 191647440. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:23,977][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:24,494][626795] Updated weights for policy 0, policy_version 215652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:26,212][626795] Updated weights for policy 0, policy_version 215662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:27,929][626795] Updated weights for policy 0, policy_version 215672 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:28,975][24592] Fps is (10 sec: 46696.1, 60 sec: 42325.6, 300 sec: 43736.9). Total num frames: 1766834176. Throughput: 0: 10989.7. Samples: 191681640. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:28,976][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:29,702][626795] Updated weights for policy 0, policy_version 215682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:31,374][626795] Updated weights for policy 0, policy_version 215692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:33,040][626795] Updated weights for policy 0, policy_version 215702 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:33,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42188.8, 300 sec: 43736.9). Total num frames: 1767071744. Throughput: 0: 11010.5. Samples: 191753550. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:33,976][24592] Avg episode reward: [(0, '4.791')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:34,779][626795] Updated weights for policy 0, policy_version 215712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:36,499][626795] Updated weights for policy 0, policy_version 215722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:38,114][626795] Updated weights for policy 0, policy_version 215732 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:38,975][24592] Fps is (10 sec: 47513.3, 60 sec: 44784.2, 300 sec: 43709.2). Total num frames: 1767309312. Throughput: 0: 10999.2. Samples: 191825178. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:38,976][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:39,823][626795] Updated weights for policy 0, policy_version 215742 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:41,588][626795] Updated weights for policy 0, policy_version 215752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:43,345][626795] Updated weights for policy 0, policy_version 215762 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:43,975][24592] Fps is (10 sec: 48332.8, 60 sec: 45056.0, 300 sec: 43709.2). Total num frames: 1767555072. Throughput: 0: 11002.8. Samples: 191861604. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:43,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:44,974][626795] Updated weights for policy 0, policy_version 215772 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:46,700][626795] Updated weights for policy 0, policy_version 215782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:48,368][626795] Updated weights for policy 0, policy_version 215792 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:48,977][24592] Fps is (10 sec: 48327.2, 60 sec: 45055.1, 300 sec: 43709.0). Total num frames: 1767792640. Throughput: 0: 10994.8. Samples: 191933658. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:48,978][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:53,414][626795] Updated weights for policy 0, policy_version 215802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:53,976][24592] Fps is (10 sec: 31947.9, 60 sec: 42325.2, 300 sec: 43153.8). Total num frames: 1767874560. Throughput: 0: 10123.7. Samples: 191966754. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:53,977][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:55,134][626795] Updated weights for policy 0, policy_version 215812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:56,849][626795] Updated weights for policy 0, policy_version 215822 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:18:58,612][626795] Updated weights for policy 0, policy_version 215832 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:58,975][24592] Fps is (10 sec: 31133.4, 60 sec: 42188.8, 300 sec: 43126.1). Total num frames: 1768103936. Throughput: 0: 10822.9. Samples: 192001050. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:18:58,977][24592] Avg episode reward: [(0, '4.877')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:00,413][626795] Updated weights for policy 0, policy_version 215842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:02,234][626795] Updated weights for policy 0, policy_version 215852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:03,959][626795] Updated weights for policy 0, policy_version 215862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:03,975][24592] Fps is (10 sec: 46695.8, 60 sec: 42188.8, 300 sec: 43653.6). Total num frames: 1768341504. Throughput: 0: 10946.8. Samples: 192070626. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:03,977][24592] Avg episode reward: [(0, '4.854')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000215862_1768341504.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:04,046][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000214594_1757954048.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:05,622][626795] Updated weights for policy 0, policy_version 215872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:07,321][626795] Updated weights for policy 0, policy_version 215882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:08,975][24592] Fps is (10 sec: 47513.6, 60 sec: 42052.3, 300 sec: 43654.0). Total num frames: 1768579072. Throughput: 0: 11015.3. Samples: 192143130. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:08,976][24592] Avg episode reward: [(0, '5.003')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:09,101][626795] Updated weights for policy 0, policy_version 215892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:10,759][626795] Updated weights for policy 0, policy_version 215902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:12,502][626795] Updated weights for policy 0, policy_version 215912 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:13,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44580.3, 300 sec: 43625.9). Total num frames: 1768816640. Throughput: 0: 11035.1. Samples: 192178218. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:13,977][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:14,161][626795] Updated weights for policy 0, policy_version 215922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:15,865][626795] Updated weights for policy 0, policy_version 215932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:17,572][626795] Updated weights for policy 0, policy_version 215942 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:18,976][24592] Fps is (10 sec: 48328.9, 60 sec: 44919.1, 300 sec: 43625.8). Total num frames: 1769062400. Throughput: 0: 11054.1. Samples: 192250992. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:18,978][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:19,344][626795] Updated weights for policy 0, policy_version 215952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:20,932][626795] Updated weights for policy 0, policy_version 215962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:22,733][626795] Updated weights for policy 0, policy_version 215972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:23,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45056.0, 300 sec: 43625.9). Total num frames: 1769299968. Throughput: 0: 11068.9. Samples: 192323280. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:23,977][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:24,281][626795] Updated weights for policy 0, policy_version 215982 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:28,976][24592] Fps is (10 sec: 31950.9, 60 sec: 42461.8, 300 sec: 43070.5). Total num frames: 1769381888. Throughput: 0: 10576.6. Samples: 192337554. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:28,977][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:29,488][626795] Updated weights for policy 0, policy_version 215992 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:31,276][626795] Updated weights for policy 0, policy_version 216002 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:32,948][626795] Updated weights for policy 0, policy_version 216012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:33,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42325.3, 300 sec: 43042.7). Total num frames: 1769611264. Throughput: 0: 10130.7. Samples: 192389526. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:33,977][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:34,681][626795] Updated weights for policy 0, policy_version 216022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:36,502][626795] Updated weights for policy 0, policy_version 216032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:38,164][626795] Updated weights for policy 0, policy_version 216042 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:38,976][24592] Fps is (10 sec: 47513.4, 60 sec: 42461.8, 300 sec: 43625.8). Total num frames: 1769857024. Throughput: 0: 10979.4. Samples: 192460824. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:38,979][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:39,887][626795] Updated weights for policy 0, policy_version 216052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:41,490][626795] Updated weights for policy 0, policy_version 216062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:43,225][626795] Updated weights for policy 0, policy_version 216072 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:43,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42325.4, 300 sec: 43598.1). Total num frames: 1770094592. Throughput: 0: 11014.8. Samples: 192496716. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:43,976][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:45,036][626795] Updated weights for policy 0, policy_version 216082 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:46,647][626795] Updated weights for policy 0, policy_version 216092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:48,323][626795] Updated weights for policy 0, policy_version 216102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:48,975][24592] Fps is (10 sec: 47514.6, 60 sec: 42326.2, 300 sec: 43598.1). Total num frames: 1770332160. Throughput: 0: 11075.6. Samples: 192569028. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:48,976][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:50,086][626795] Updated weights for policy 0, policy_version 216112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:51,719][626795] Updated weights for policy 0, policy_version 216122 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:53,500][626795] Updated weights for policy 0, policy_version 216132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:53,975][24592] Fps is (10 sec: 47513.5, 60 sec: 44919.7, 300 sec: 43598.1). Total num frames: 1770569728. Throughput: 0: 11066.1. Samples: 192641106. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:53,976][24592] Avg episode reward: [(0, '4.900')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:55,179][626795] Updated weights for policy 0, policy_version 216142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:56,816][626795] Updated weights for policy 0, policy_version 216152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:19:58,566][626795] Updated weights for policy 0, policy_version 216162 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:58,976][24592] Fps is (10 sec: 48331.0, 60 sec: 45192.3, 300 sec: 43598.1). Total num frames: 1770815488. Throughput: 0: 11095.9. Samples: 192677538. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:19:58,976][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:00,231][626795] Updated weights for policy 0, policy_version 216172 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:03,976][24592] Fps is (10 sec: 32766.8, 60 sec: 42598.2, 300 sec: 43042.7). Total num frames: 1770897408. Throughput: 0: 10490.2. Samples: 192723048. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:03,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:05,175][626795] Updated weights for policy 0, policy_version 216182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:06,943][626795] Updated weights for policy 0, policy_version 216192 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:08,686][626795] Updated weights for policy 0, policy_version 216202 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:08,976][24592] Fps is (10 sec: 31948.6, 60 sec: 42598.1, 300 sec: 43042.6). Total num frames: 1771134976. Throughput: 0: 10199.2. Samples: 192782250. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:08,977][24592] Avg episode reward: [(0, '4.959')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:10,343][626795] Updated weights for policy 0, policy_version 216212 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:12,108][626795] Updated weights for policy 0, policy_version 216222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:13,829][626795] Updated weights for policy 0, policy_version 216232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:13,975][24592] Fps is (10 sec: 48334.3, 60 sec: 42734.9, 300 sec: 43627.6). Total num frames: 1771380736. Throughput: 0: 10673.0. Samples: 192817836. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:13,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:15,490][626795] Updated weights for policy 0, policy_version 216242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:17,228][626795] Updated weights for policy 0, policy_version 216252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:18,845][626795] Updated weights for policy 0, policy_version 216262 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:18,975][24592] Fps is (10 sec: 48334.9, 60 sec: 42599.0, 300 sec: 43598.1). Total num frames: 1771618304. Throughput: 0: 11123.9. Samples: 192890100. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:18,977][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:20,562][626795] Updated weights for policy 0, policy_version 216272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:22,189][626795] Updated weights for policy 0, policy_version 216282 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:23,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42734.9, 300 sec: 43625.9). Total num frames: 1771864064. Throughput: 0: 11161.9. Samples: 192963108. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:23,976][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:23,976][626795] Updated weights for policy 0, policy_version 216292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:25,630][626795] Updated weights for policy 0, policy_version 216302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:27,353][626795] Updated weights for policy 0, policy_version 216312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:28,976][24592] Fps is (10 sec: 48331.2, 60 sec: 45328.9, 300 sec: 43598.1). Total num frames: 1772101632. Throughput: 0: 11160.4. Samples: 192998940. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:28,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:29,130][626795] Updated weights for policy 0, policy_version 216322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:30,682][626795] Updated weights for policy 0, policy_version 216332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:32,414][626795] Updated weights for policy 0, policy_version 216342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:33,975][24592] Fps is (10 sec: 47513.8, 60 sec: 45465.6, 300 sec: 43570.3). Total num frames: 1772339200. Throughput: 0: 11172.4. Samples: 193071786. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:33,976][24592] Avg episode reward: [(0, '4.892')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:34,195][626795] Updated weights for policy 0, policy_version 216352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:35,758][626795] Updated weights for policy 0, policy_version 216362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:39,230][24592] Fps is (10 sec: 32754.6, 60 sec: 42826.5, 300 sec: 43061.1). Total num frames: 1772437504. Throughput: 0: 10316.7. Samples: 193107984. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:39,232][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:41,118][626795] Updated weights for policy 0, policy_version 216372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:42,825][626795] Updated weights for policy 0, policy_version 216382 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:43,975][24592] Fps is (10 sec: 31129.6, 60 sec: 42598.4, 300 sec: 43014.9). Total num frames: 1772650496. Throughput: 0: 10213.7. Samples: 193137150. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:43,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:44,693][626795] Updated weights for policy 0, policy_version 216392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:46,609][626795] Updated weights for policy 0, policy_version 216402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:48,331][626795] Updated weights for policy 0, policy_version 216412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:48,976][24592] Fps is (10 sec: 44549.2, 60 sec: 42325.0, 300 sec: 43507.4). Total num frames: 1772871680. Throughput: 0: 10697.0. Samples: 193204416. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:48,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:50,229][626795] Updated weights for policy 0, policy_version 216422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:51,993][626795] Updated weights for policy 0, policy_version 216432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:53,733][626795] Updated weights for policy 0, policy_version 216442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:53,975][24592] Fps is (10 sec: 45056.1, 60 sec: 42188.8, 300 sec: 43487.0). Total num frames: 1773101056. Throughput: 0: 10894.8. Samples: 193272510. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:53,977][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:55,539][626795] Updated weights for policy 0, policy_version 216452 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:57,248][626795] Updated weights for policy 0, policy_version 216462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:20:58,822][626795] Updated weights for policy 0, policy_version 216472 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:58,975][24592] Fps is (10 sec: 47515.6, 60 sec: 42189.0, 300 sec: 43487.0). Total num frames: 1773346816. Throughput: 0: 10905.1. Samples: 193308564. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:20:58,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:00,534][626795] Updated weights for policy 0, policy_version 216482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:02,271][626795] Updated weights for policy 0, policy_version 216492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:03,931][626795] Updated weights for policy 0, policy_version 216502 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:03,976][24592] Fps is (10 sec: 48330.6, 60 sec: 44782.9, 300 sec: 43487.0). Total num frames: 1773584384. Throughput: 0: 10921.4. Samples: 193381566. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:03,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000216502_1773584384.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:04,054][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000215231_1763172352.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:05,718][626795] Updated weights for policy 0, policy_version 216512 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:07,271][626795] Updated weights for policy 0, policy_version 216522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:08,975][24592] Fps is (10 sec: 47514.5, 60 sec: 44783.3, 300 sec: 43459.3). Total num frames: 1773821952. Throughput: 0: 10899.6. Samples: 193453590. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:08,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:09,148][626795] Updated weights for policy 0, policy_version 216532 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:10,720][626795] Updated weights for policy 0, policy_version 216542 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:14,174][24592] Fps is (10 sec: 35343.5, 60 sec: 42593.8, 300 sec: 43013.7). Total num frames: 1773944832. Throughput: 0: 10059.0. Samples: 193453590. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:14,175][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:15,074][626795] Updated weights for policy 0, policy_version 216552 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:16,799][626795] Updated weights for policy 0, policy_version 216562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:18,476][626795] Updated weights for policy 0, policy_version 216572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:18,975][24592] Fps is (10 sec: 35225.3, 60 sec: 42598.4, 300 sec: 42987.2). Total num frames: 1774174208. Throughput: 0: 10174.5. Samples: 193529640. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:18,976][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:20,220][626795] Updated weights for policy 0, policy_version 216582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:21,871][626795] Updated weights for policy 0, policy_version 216592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:23,643][626795] Updated weights for policy 0, policy_version 216602 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:23,975][24592] Fps is (10 sec: 48477.7, 60 sec: 42598.4, 300 sec: 43559.2). Total num frames: 1774419968. Throughput: 0: 11043.1. Samples: 193602114. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:23,976][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:25,421][626795] Updated weights for policy 0, policy_version 216612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:26,974][626795] Updated weights for policy 0, policy_version 216622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:28,671][626795] Updated weights for policy 0, policy_version 216632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:28,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42598.6, 300 sec: 43570.3). Total num frames: 1774657536. Throughput: 0: 11132.3. Samples: 193638102. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:28,976][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:30,363][626795] Updated weights for policy 0, policy_version 216642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:32,087][626795] Updated weights for policy 0, policy_version 216652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:33,717][626795] Updated weights for policy 0, policy_version 216662 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:33,975][24592] Fps is (10 sec: 48332.2, 60 sec: 42734.9, 300 sec: 43598.1). Total num frames: 1774903296. Throughput: 0: 11263.0. Samples: 193711248. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:33,979][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:35,358][626795] Updated weights for policy 0, policy_version 216672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:37,094][626795] Updated weights for policy 0, policy_version 216682 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:38,776][626795] Updated weights for policy 0, policy_version 216692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:38,975][24592] Fps is (10 sec: 49151.8, 60 sec: 45385.0, 300 sec: 43598.1). Total num frames: 1775149056. Throughput: 0: 11378.1. Samples: 193784526. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:38,977][24592] Avg episode reward: [(0, '4.873')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:40,552][626795] Updated weights for policy 0, policy_version 216702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:42,165][626795] Updated weights for policy 0, policy_version 216712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:43,800][626795] Updated weights for policy 0, policy_version 216722 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:43,975][24592] Fps is (10 sec: 49152.5, 60 sec: 45738.7, 300 sec: 43625.9). Total num frames: 1775394816. Throughput: 0: 11370.2. Samples: 193820220. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:43,976][24592] Avg episode reward: [(0, '4.341')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:45,607][626795] Updated weights for policy 0, policy_version 216732 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:49,974][24592] Fps is (10 sec: 32771.6, 60 sec: 43244.2, 300 sec: 43035.9). Total num frames: 1775509504. Throughput: 0: 10327.5. Samples: 193856616. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:49,976][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:50,924][626795] Updated weights for policy 0, policy_version 216742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:52,678][626795] Updated weights for policy 0, policy_version 216752 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:53,976][24592] Fps is (10 sec: 28671.4, 60 sec: 43007.8, 300 sec: 42931.6). Total num frames: 1775681536. Throughput: 0: 10348.1. Samples: 193919256. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:53,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:54,492][626795] Updated weights for policy 0, policy_version 216762 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:56,395][626795] Updated weights for policy 0, policy_version 216772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:21:58,246][626795] Updated weights for policy 0, policy_version 216782 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:58,975][24592] Fps is (10 sec: 44595.0, 60 sec: 42735.0, 300 sec: 43434.7). Total num frames: 1775910912. Throughput: 0: 11146.3. Samples: 193952958. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:21:58,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:00,060][626795] Updated weights for policy 0, policy_version 216792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:01,844][626795] Updated weights for policy 0, policy_version 216802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:03,427][626795] Updated weights for policy 0, policy_version 216812 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:03,975][24592] Fps is (10 sec: 46695.1, 60 sec: 42735.2, 300 sec: 43459.3). Total num frames: 1776148480. Throughput: 0: 10933.9. Samples: 194021664. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:03,976][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:05,142][626795] Updated weights for policy 0, policy_version 216822 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:06,854][626795] Updated weights for policy 0, policy_version 216832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:08,531][626795] Updated weights for policy 0, policy_version 216842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:08,975][24592] Fps is (10 sec: 47513.4, 60 sec: 42734.8, 300 sec: 43431.5). Total num frames: 1776386048. Throughput: 0: 10944.1. Samples: 194094600. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:08,977][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:10,183][626795] Updated weights for policy 0, policy_version 216852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:11,889][626795] Updated weights for policy 0, policy_version 216862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:13,628][626795] Updated weights for policy 0, policy_version 216872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:13,975][24592] Fps is (10 sec: 47514.1, 60 sec: 44794.9, 300 sec: 43431.5). Total num frames: 1776623616. Throughput: 0: 10950.5. Samples: 194130876. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:13,976][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:15,366][626795] Updated weights for policy 0, policy_version 216882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:16,927][626795] Updated weights for policy 0, policy_version 216892 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:18,758][626795] Updated weights for policy 0, policy_version 216902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:18,976][24592] Fps is (10 sec: 48331.5, 60 sec: 44919.2, 300 sec: 43431.4). Total num frames: 1776869376. Throughput: 0: 10929.1. Samples: 194203062. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:18,977][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:20,402][626795] Updated weights for policy 0, policy_version 216912 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:22,157][626795] Updated weights for policy 0, policy_version 216922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:24,871][24592] Fps is (10 sec: 36842.1, 60 sec: 42779.2, 300 sec: 43023.3). Total num frames: 1777025024. Throughput: 0: 9906.5. Samples: 194239188. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:24,871][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:26,469][626795] Updated weights for policy 0, policy_version 216932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:28,197][626795] Updated weights for policy 0, policy_version 216942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:28,975][24592] Fps is (10 sec: 35226.8, 60 sec: 42734.9, 300 sec: 42987.2). Total num frames: 1777221632. Throughput: 0: 10214.7. Samples: 194279880. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:28,977][24592] Avg episode reward: [(0, '4.907')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:29,916][626795] Updated weights for policy 0, policy_version 216952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:31,635][626795] Updated weights for policy 0, policy_version 216962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:33,217][626795] Updated weights for policy 0, policy_version 216972 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:33,975][24592] Fps is (10 sec: 48587.0, 60 sec: 42735.0, 300 sec: 43532.7). Total num frames: 1777467392. Throughput: 0: 11254.8. Samples: 194351838. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:33,976][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:34,979][626795] Updated weights for policy 0, policy_version 216982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:36,643][626795] Updated weights for policy 0, policy_version 216992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:38,271][626795] Updated weights for policy 0, policy_version 217002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:38,975][24592] Fps is (10 sec: 49152.1, 60 sec: 42735.0, 300 sec: 43598.1). Total num frames: 1777713152. Throughput: 0: 11237.9. Samples: 194424960. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:38,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:39,907][626795] Updated weights for policy 0, policy_version 217012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:41,672][626795] Updated weights for policy 0, policy_version 217022 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:43,356][626795] Updated weights for policy 0, policy_version 217032 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:43,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42598.4, 300 sec: 43598.1). Total num frames: 1777950720. Throughput: 0: 11302.0. Samples: 194461548. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:43,976][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:45,114][626795] Updated weights for policy 0, policy_version 217042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:46,707][626795] Updated weights for policy 0, policy_version 217052 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:48,432][626795] Updated weights for policy 0, policy_version 217062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:48,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45541.1, 300 sec: 43598.1). Total num frames: 1778196480. Throughput: 0: 11387.0. Samples: 194534076. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:48,976][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:50,141][626795] Updated weights for policy 0, policy_version 217072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:51,819][626795] Updated weights for policy 0, policy_version 217082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:53,512][626795] Updated weights for policy 0, policy_version 217092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:53,976][24592] Fps is (10 sec: 48327.4, 60 sec: 45874.5, 300 sec: 43597.9). Total num frames: 1778434048. Throughput: 0: 11388.0. Samples: 194607072. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:22:53,978][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:55,287][626795] Updated weights for policy 0, policy_version 217102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:22:56,960][626795] Updated weights for policy 0, policy_version 217112 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:00,639][24592] Fps is (10 sec: 33009.4, 60 sec: 43308.7, 300 sec: 43049.8). Total num frames: 1778581504. Throughput: 0: 10971.5. Samples: 194642850. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:00,641][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:02,285][626795] Updated weights for policy 0, policy_version 217122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:03,975][24592] Fps is (10 sec: 30313.5, 60 sec: 43144.5, 300 sec: 42987.2). Total num frames: 1778737152. Throughput: 0: 10390.7. Samples: 194670642. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:03,976][24592] Avg episode reward: [(0, '4.900')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000217131_1778737152.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:04,050][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000215862_1768341504.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:04,163][626795] Updated weights for policy 0, policy_version 217132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:05,935][626795] Updated weights for policy 0, policy_version 217142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:07,674][626795] Updated weights for policy 0, policy_version 217152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:08,976][24592] Fps is (10 sec: 46184.2, 60 sec: 43007.5, 300 sec: 43462.0). Total num frames: 1778966528. Throughput: 0: 11351.1. Samples: 194739834. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:08,979][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:09,430][626795] Updated weights for policy 0, policy_version 217162 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:11,138][626795] Updated weights for policy 0, policy_version 217172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:12,787][626795] Updated weights for policy 0, policy_version 217182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:13,976][24592] Fps is (10 sec: 47513.0, 60 sec: 43144.4, 300 sec: 43542.6). Total num frames: 1779212288. Throughput: 0: 11011.5. Samples: 194775402. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:13,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:14,523][626795] Updated weights for policy 0, policy_version 217192 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:16,124][626795] Updated weights for policy 0, policy_version 217202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:17,774][626795] Updated weights for policy 0, policy_version 217212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:18,975][24592] Fps is (10 sec: 49156.0, 60 sec: 43144.8, 300 sec: 43598.1). Total num frames: 1779458048. Throughput: 0: 11046.7. Samples: 194848938. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:18,976][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:19,544][626795] Updated weights for policy 0, policy_version 217222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:21,188][626795] Updated weights for policy 0, policy_version 217232 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:22,848][626795] Updated weights for policy 0, policy_version 217242 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:23,975][24592] Fps is (10 sec: 48333.4, 60 sec: 45184.0, 300 sec: 43598.1). Total num frames: 1779695616. Throughput: 0: 11039.4. Samples: 194921736. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:23,976][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:24,484][626795] Updated weights for policy 0, policy_version 217252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:26,260][626795] Updated weights for policy 0, policy_version 217262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:27,946][626795] Updated weights for policy 0, policy_version 217272 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:28,975][24592] Fps is (10 sec: 48332.0, 60 sec: 45329.0, 300 sec: 43625.9). Total num frames: 1779941376. Throughput: 0: 11034.1. Samples: 194958084. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:28,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:29,573][626795] Updated weights for policy 0, policy_version 217282 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:31,324][626795] Updated weights for policy 0, policy_version 217292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:32,743][626772] Signal inference workers to stop experience collection... (2650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:32,751][626772] Signal inference workers to resume experience collection... (2650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:32,762][626795] InferenceWorker_p0-w0: stopping experience collection (2650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:32,767][626795] InferenceWorker_p0-w0: resuming experience collection (2650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:35,890][24592] Fps is (10 sec: 36441.8, 60 sec: 43001.4, 300 sec: 43179.1). Total num frames: 1780129792. Throughput: 0: 10593.9. Samples: 195031080. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:35,891][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:36,026][626795] Updated weights for policy 0, policy_version 217302 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:37,746][626795] Updated weights for policy 0, policy_version 217312 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:38,975][24592] Fps is (10 sec: 32768.3, 60 sec: 42598.4, 300 sec: 43098.3). Total num frames: 1780269056. Throughput: 0: 10184.4. Samples: 195065358. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:38,978][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:39,601][626795] Updated weights for policy 0, policy_version 217322 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:41,333][626795] Updated weights for policy 0, policy_version 217332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:42,928][626795] Updated weights for policy 0, policy_version 217342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:43,975][24592] Fps is (10 sec: 46604.4, 60 sec: 42598.3, 300 sec: 43098.4). Total num frames: 1780506624. Throughput: 0: 10574.2. Samples: 195101094. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:43,977][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:44,714][626795] Updated weights for policy 0, policy_version 217352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:46,379][626795] Updated weights for policy 0, policy_version 217362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:48,091][626795] Updated weights for policy 0, policy_version 217372 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:48,976][24592] Fps is (10 sec: 48331.9, 60 sec: 42598.3, 300 sec: 43653.7). Total num frames: 1780752384. Throughput: 0: 11171.3. Samples: 195173352. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:48,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:49,693][626795] Updated weights for policy 0, policy_version 217382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:51,432][626795] Updated weights for policy 0, policy_version 217392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:53,069][626795] Updated weights for policy 0, policy_version 217402 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:53,975][24592] Fps is (10 sec: 48332.6, 60 sec: 42599.1, 300 sec: 43681.4). Total num frames: 1780989952. Throughput: 0: 11255.9. Samples: 195246342. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:53,977][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:54,803][626795] Updated weights for policy 0, policy_version 217412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:56,513][626795] Updated weights for policy 0, policy_version 217422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:58,186][626795] Updated weights for policy 0, policy_version 217432 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:58,975][24592] Fps is (10 sec: 48333.8, 60 sec: 45498.7, 300 sec: 43709.2). Total num frames: 1781235712. Throughput: 0: 11274.7. Samples: 195282762. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:23:58,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:23:59,873][626795] Updated weights for policy 0, policy_version 217442 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:01,512][626795] Updated weights for policy 0, policy_version 217452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:03,292][626795] Updated weights for policy 0, policy_version 217462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:03,975][24592] Fps is (10 sec: 49152.5, 60 sec: 45738.7, 300 sec: 43736.9). Total num frames: 1781481472. Throughput: 0: 11255.9. Samples: 195355452. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:03,976][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:04,938][626795] Updated weights for policy 0, policy_version 217472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:06,661][626795] Updated weights for policy 0, policy_version 217482 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:11,401][24592] Fps is (10 sec: 35600.7, 60 sec: 43436.9, 300 sec: 43242.5). Total num frames: 1781678080. Throughput: 0: 9895.6. Samples: 195391044. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:11,402][24592] Avg episode reward: [(0, '5.078')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:11,717][626795] Updated weights for policy 0, policy_version 217492 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:13,608][626795] Updated weights for policy 0, policy_version 217502 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:13,976][24592] Fps is (10 sec: 31128.1, 60 sec: 43007.8, 300 sec: 43153.8). Total num frames: 1781792768. Throughput: 0: 10344.3. Samples: 195423582. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:13,978][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:15,381][626795] Updated weights for policy 0, policy_version 217512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:17,033][626795] Updated weights for policy 0, policy_version 217522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:18,685][626795] Updated weights for policy 0, policy_version 217532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:18,975][24592] Fps is (10 sec: 46507.4, 60 sec: 42871.4, 300 sec: 43153.8). Total num frames: 1782030336. Throughput: 0: 10726.4. Samples: 195493236. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:18,976][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:20,457][626795] Updated weights for policy 0, policy_version 217542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:22,139][626795] Updated weights for policy 0, policy_version 217552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:23,693][626795] Updated weights for policy 0, policy_version 217562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:23,975][24592] Fps is (10 sec: 48334.9, 60 sec: 43008.0, 300 sec: 43709.2). Total num frames: 1782276096. Throughput: 0: 11144.0. Samples: 195566838. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:23,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:25,374][626795] Updated weights for policy 0, policy_version 217572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:27,160][626795] Updated weights for policy 0, policy_version 217582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:28,813][626795] Updated weights for policy 0, policy_version 217592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:28,982][24592] Fps is (10 sec: 49121.4, 60 sec: 43003.6, 300 sec: 43763.8). Total num frames: 1782521856. Throughput: 0: 11147.9. Samples: 195602820. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:28,983][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:30,414][626795] Updated weights for policy 0, policy_version 217602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:32,109][626795] Updated weights for policy 0, policy_version 217612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:33,830][626795] Updated weights for policy 0, policy_version 217622 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:33,975][24592] Fps is (10 sec: 49151.8, 60 sec: 45412.6, 300 sec: 43764.7). Total num frames: 1782767616. Throughput: 0: 11180.8. Samples: 195676488. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:33,976][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:35,423][626795] Updated weights for policy 0, policy_version 217632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:37,074][626795] Updated weights for policy 0, policy_version 217642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:38,766][626795] Updated weights for policy 0, policy_version 217652 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:38,975][24592] Fps is (10 sec: 49182.5, 60 sec: 45738.7, 300 sec: 43792.5). Total num frames: 1783013376. Throughput: 0: 11196.0. Samples: 195750162. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:38,977][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:40,538][626795] Updated weights for policy 0, policy_version 217662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:42,186][626795] Updated weights for policy 0, policy_version 217672 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:46,875][24592] Fps is (10 sec: 36198.4, 60 sec: 43369.7, 300 sec: 43311.2). Total num frames: 1783234560. Throughput: 0: 10513.7. Samples: 195786366. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:46,877][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:47,203][626795] Updated weights for policy 0, policy_version 217682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:48,976][24592] Fps is (10 sec: 31129.2, 60 sec: 42871.5, 300 sec: 43237.1). Total num frames: 1783324672. Throughput: 0: 10288.5. Samples: 195818436. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:48,980][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:48,993][626795] Updated weights for policy 0, policy_version 217692 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:50,663][626795] Updated weights for policy 0, policy_version 217702 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:52,321][626795] Updated weights for policy 0, policy_version 217712 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:53,975][24592] Fps is (10 sec: 47303.3, 60 sec: 43008.0, 300 sec: 43237.1). Total num frames: 1783570432. Throughput: 0: 11737.9. Samples: 195890778. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:53,976][24592] Avg episode reward: [(0, '4.855')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:54,044][626795] Updated weights for policy 0, policy_version 217722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:55,778][626795] Updated weights for policy 0, policy_version 217732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:57,347][626795] Updated weights for policy 0, policy_version 217742 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:58,975][24592] Fps is (10 sec: 49152.5, 60 sec: 43007.9, 300 sec: 43792.5). Total num frames: 1783816192. Throughput: 0: 11180.4. Samples: 195926694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:24:58,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:24:59,134][626795] Updated weights for policy 0, policy_version 217752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:00,889][626795] Updated weights for policy 0, policy_version 217762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:02,453][626795] Updated weights for policy 0, policy_version 217772 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:03,977][24592] Fps is (10 sec: 48326.7, 60 sec: 42870.5, 300 sec: 43792.4). Total num frames: 1784053760. Throughput: 0: 11253.0. Samples: 195999636. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:03,988][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:04,023][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000217781_1784061952.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:04,097][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000216502_1773584384.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:04,200][626795] Updated weights for policy 0, policy_version 217782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:05,903][626795] Updated weights for policy 0, policy_version 217792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:07,598][626795] Updated weights for policy 0, policy_version 217802 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:08,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45531.5, 300 sec: 43792.5). Total num frames: 1784299520. Throughput: 0: 11231.5. Samples: 196072254. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:08,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:09,263][626795] Updated weights for policy 0, policy_version 217812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:11,000][626795] Updated weights for policy 0, policy_version 217822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:12,592][626795] Updated weights for policy 0, policy_version 217832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:13,975][24592] Fps is (10 sec: 49158.4, 60 sec: 45875.6, 300 sec: 43820.3). Total num frames: 1784545280. Throughput: 0: 11236.8. Samples: 196108404. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:13,976][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:14,308][626795] Updated weights for policy 0, policy_version 217842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:16,055][626795] Updated weights for policy 0, policy_version 217852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:17,607][626795] Updated weights for policy 0, policy_version 217862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:18,975][24592] Fps is (10 sec: 48333.0, 60 sec: 45875.2, 300 sec: 43792.5). Total num frames: 1784782848. Throughput: 0: 11227.2. Samples: 196181712. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:18,976][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:22,860][626795] Updated weights for policy 0, policy_version 217872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:23,976][24592] Fps is (10 sec: 31129.0, 60 sec: 43007.9, 300 sec: 43237.1). Total num frames: 1784856576. Throughput: 0: 10256.2. Samples: 196211694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:23,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:24,731][626795] Updated weights for policy 0, policy_version 217882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:26,298][626795] Updated weights for policy 0, policy_version 217892 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:28,027][626795] Updated weights for policy 0, policy_version 217902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:28,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42875.9, 300 sec: 43237.1). Total num frames: 1785094144. Throughput: 0: 10934.0. Samples: 196246692. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:28,977][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:29,780][626795] Updated weights for policy 0, policy_version 217912 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:31,447][626795] Updated weights for policy 0, policy_version 217922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:33,081][626795] Updated weights for policy 0, policy_version 217932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:33,975][24592] Fps is (10 sec: 48333.8, 60 sec: 42871.5, 300 sec: 43774.7). Total num frames: 1785339904. Throughput: 0: 11135.4. Samples: 196319526. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:33,977][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:34,846][626795] Updated weights for policy 0, policy_version 217942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:36,452][626795] Updated weights for policy 0, policy_version 217952 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:38,113][626795] Updated weights for policy 0, policy_version 217962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:38,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42735.0, 300 sec: 43820.3). Total num frames: 1785577472. Throughput: 0: 11155.9. Samples: 196392792. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:38,976][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:39,888][626795] Updated weights for policy 0, policy_version 217972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:41,663][626795] Updated weights for policy 0, policy_version 217982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:43,171][626795] Updated weights for policy 0, policy_version 217992 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:43,975][24592] Fps is (10 sec: 47513.7, 60 sec: 45192.1, 300 sec: 43875.9). Total num frames: 1785815040. Throughput: 0: 11157.9. Samples: 196428798. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:43,976][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:44,912][626795] Updated weights for policy 0, policy_version 218002 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:46,569][626795] Updated weights for policy 0, policy_version 218012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:48,396][626795] Updated weights for policy 0, policy_version 218022 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:48,975][24592] Fps is (10 sec: 48332.9, 60 sec: 45602.3, 300 sec: 43931.3). Total num frames: 1786060800. Throughput: 0: 11161.5. Samples: 196501890. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:48,976][24592] Avg episode reward: [(0, '4.861')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:49,955][626795] Updated weights for policy 0, policy_version 218032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:51,682][626795] Updated weights for policy 0, policy_version 218042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:53,440][626795] Updated weights for policy 0, policy_version 218052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:53,975][24592] Fps is (10 sec: 48332.6, 60 sec: 45465.6, 300 sec: 43903.6). Total num frames: 1786298368. Throughput: 0: 11144.0. Samples: 196573734. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:53,976][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:25:58,528][626795] Updated weights for policy 0, policy_version 218062 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:58,976][24592] Fps is (10 sec: 31947.8, 60 sec: 42734.8, 300 sec: 43376.0). Total num frames: 1786380288. Throughput: 0: 10612.1. Samples: 196585950. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:25:58,977][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:00,412][626795] Updated weights for policy 0, policy_version 218072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:02,141][626795] Updated weights for policy 0, policy_version 218082 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:03,788][626795] Updated weights for policy 0, policy_version 218092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:03,977][24592] Fps is (10 sec: 31943.6, 60 sec: 42734.7, 300 sec: 43375.7). Total num frames: 1786617856. Throughput: 0: 10160.6. Samples: 196638954. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:03,979][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:05,463][626795] Updated weights for policy 0, policy_version 218102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:07,184][626795] Updated weights for policy 0, policy_version 218112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:08,836][626795] Updated weights for policy 0, policy_version 218122 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:08,976][24592] Fps is (10 sec: 48332.5, 60 sec: 42734.7, 300 sec: 43822.0). Total num frames: 1786863616. Throughput: 0: 11126.8. Samples: 196712400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:08,978][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:10,453][626795] Updated weights for policy 0, policy_version 218132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:12,188][626795] Updated weights for policy 0, policy_version 218142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:13,845][626795] Updated weights for policy 0, policy_version 218152 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:13,975][24592] Fps is (10 sec: 48340.5, 60 sec: 42598.4, 300 sec: 43820.3). Total num frames: 1787101184. Throughput: 0: 11163.7. Samples: 196749060. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:13,978][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:15,476][626795] Updated weights for policy 0, policy_version 218162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:17,166][626795] Updated weights for policy 0, policy_version 218172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:18,903][626795] Updated weights for policy 0, policy_version 218182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:18,975][24592] Fps is (10 sec: 48334.6, 60 sec: 42734.9, 300 sec: 43820.3). Total num frames: 1787346944. Throughput: 0: 11167.3. Samples: 196822056. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:18,976][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:20,558][626795] Updated weights for policy 0, policy_version 218192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:22,249][626795] Updated weights for policy 0, policy_version 218202 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:23,976][24592] Fps is (10 sec: 48330.8, 60 sec: 45465.4, 300 sec: 43820.2). Total num frames: 1787584512. Throughput: 0: 11160.0. Samples: 196894998. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:23,978][24592] Avg episode reward: [(0, '4.926')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:23,984][626795] Updated weights for policy 0, policy_version 218212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:25,635][626795] Updated weights for policy 0, policy_version 218222 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:27,350][626795] Updated weights for policy 0, policy_version 218232 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:28,975][24592] Fps is (10 sec: 48332.6, 60 sec: 45602.1, 300 sec: 43820.3). Total num frames: 1787830272. Throughput: 0: 11168.8. Samples: 196931394. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:28,977][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:29,081][626795] Updated weights for policy 0, policy_version 218242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:33,975][24592] Fps is (10 sec: 32769.5, 60 sec: 42871.5, 300 sec: 43264.9). Total num frames: 1787912192. Throughput: 0: 10473.5. Samples: 196973196. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:33,977][24592] Avg episode reward: [(0, '4.892')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:34,232][626795] Updated weights for policy 0, policy_version 218252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:35,878][626795] Updated weights for policy 0, policy_version 218262 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:37,543][626795] Updated weights for policy 0, policy_version 218272 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:38,975][24592] Fps is (10 sec: 31949.0, 60 sec: 42871.5, 300 sec: 43237.1). Total num frames: 1788149760. Throughput: 0: 10240.9. Samples: 197034576. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:38,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:39,317][626795] Updated weights for policy 0, policy_version 218282 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:40,985][626795] Updated weights for policy 0, policy_version 218292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:42,679][626795] Updated weights for policy 0, policy_version 218302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:43,975][24592] Fps is (10 sec: 47513.5, 60 sec: 42871.4, 300 sec: 43801.9). Total num frames: 1788387328. Throughput: 0: 10771.4. Samples: 197070660. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:43,976][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:44,402][626795] Updated weights for policy 0, policy_version 218312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:46,020][626795] Updated weights for policy 0, policy_version 218322 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:47,798][626795] Updated weights for policy 0, policy_version 218332 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:48,975][24592] Fps is (10 sec: 48332.6, 60 sec: 42871.4, 300 sec: 43903.6). Total num frames: 1788633088. Throughput: 0: 11211.2. Samples: 197143440. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:48,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:49,450][626795] Updated weights for policy 0, policy_version 218342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:51,083][626795] Updated weights for policy 0, policy_version 218352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:52,796][626795] Updated weights for policy 0, policy_version 218362 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:53,976][24592] Fps is (10 sec: 48331.7, 60 sec: 42871.3, 300 sec: 43931.3). Total num frames: 1788870656. Throughput: 0: 11192.6. Samples: 197216064. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:53,976][24592] Avg episode reward: [(0, '5.037')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:54,561][626795] Updated weights for policy 0, policy_version 218372 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:56,165][626795] Updated weights for policy 0, policy_version 218382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:57,847][626795] Updated weights for policy 0, policy_version 218392 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:58,975][24592] Fps is (10 sec: 48332.5, 60 sec: 45602.3, 300 sec: 43959.1). Total num frames: 1789116416. Throughput: 0: 11180.9. Samples: 197252202. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:26:58,977][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:26:59,617][626795] Updated weights for policy 0, policy_version 218402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:01,371][626795] Updated weights for policy 0, policy_version 218412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:02,957][626795] Updated weights for policy 0, policy_version 218422 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:03,975][24592] Fps is (10 sec: 48333.9, 60 sec: 45603.4, 300 sec: 43959.1). Total num frames: 1789353984. Throughput: 0: 11177.9. Samples: 197325060. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:03,977][24592] Avg episode reward: [(0, '4.935')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000218427_1789353984.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:04,068][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000217131_1778737152.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:04,824][626795] Updated weights for policy 0, policy_version 218432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:08,979][24592] Fps is (10 sec: 31936.8, 60 sec: 42869.0, 300 sec: 43430.9). Total num frames: 1789435904. Throughput: 0: 10299.1. Samples: 197358492. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:08,982][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:10,039][626795] Updated weights for policy 0, policy_version 218442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:11,708][626795] Updated weights for policy 0, policy_version 218452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:13,464][626795] Updated weights for policy 0, policy_version 218462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:13,975][24592] Fps is (10 sec: 30310.3, 60 sec: 42598.4, 300 sec: 43348.2). Total num frames: 1789657088. Throughput: 0: 10170.1. Samples: 197389050. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:13,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:15,240][626795] Updated weights for policy 0, policy_version 218472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:16,837][626795] Updated weights for policy 0, policy_version 218482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:18,506][626795] Updated weights for policy 0, policy_version 218492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:18,976][24592] Fps is (10 sec: 46710.7, 60 sec: 42598.1, 300 sec: 43786.5). Total num frames: 1789902848. Throughput: 0: 10849.4. Samples: 197461422. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:18,981][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:20,216][626795] Updated weights for policy 0, policy_version 218502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:21,845][626795] Updated weights for policy 0, policy_version 218512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:23,580][626795] Updated weights for policy 0, policy_version 218522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:23,975][24592] Fps is (10 sec: 49152.3, 60 sec: 42735.3, 300 sec: 43820.3). Total num frames: 1790148608. Throughput: 0: 11117.1. Samples: 197534844. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:23,977][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:25,319][626795] Updated weights for policy 0, policy_version 218532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:26,980][626795] Updated weights for policy 0, policy_version 218542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:28,647][626795] Updated weights for policy 0, policy_version 218552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:28,976][24592] Fps is (10 sec: 48333.0, 60 sec: 42598.2, 300 sec: 43792.4). Total num frames: 1790386176. Throughput: 0: 11107.8. Samples: 197570514. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:28,978][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:30,314][626795] Updated weights for policy 0, policy_version 218562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:32,104][626795] Updated weights for policy 0, policy_version 218572 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:33,717][626795] Updated weights for policy 0, policy_version 218582 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:33,975][24592] Fps is (10 sec: 48332.5, 60 sec: 45329.0, 300 sec: 43792.5). Total num frames: 1790631936. Throughput: 0: 11111.9. Samples: 197643474. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:33,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:35,426][626795] Updated weights for policy 0, policy_version 218592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:37,088][626795] Updated weights for policy 0, policy_version 218602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:38,904][626795] Updated weights for policy 0, policy_version 218612 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:38,975][24592] Fps is (10 sec: 49153.4, 60 sec: 45465.6, 300 sec: 43820.3). Total num frames: 1790877696. Throughput: 0: 11114.5. Samples: 197716212. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:38,977][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:40,493][626795] Updated weights for policy 0, policy_version 218622 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:44,410][24592] Fps is (10 sec: 32973.9, 60 sec: 42834.4, 300 sec: 43256.7). Total num frames: 1790976000. Throughput: 0: 10212.8. Samples: 197716212. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:44,411][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:45,538][626795] Updated weights for policy 0, policy_version 218632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:47,292][626795] Updated weights for policy 0, policy_version 218642 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:48,975][24592] Fps is (10 sec: 31129.7, 60 sec: 42598.4, 300 sec: 43237.3). Total num frames: 1791188992. Throughput: 0: 10194.0. Samples: 197783790. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:48,976][24592] Avg episode reward: [(0, '5.002')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:49,015][626795] Updated weights for policy 0, policy_version 218652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:50,728][626795] Updated weights for policy 0, policy_version 218662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:52,343][626795] Updated weights for policy 0, policy_version 218672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:53,977][24592] Fps is (10 sec: 47952.5, 60 sec: 42734.2, 300 sec: 43817.3). Total num frames: 1791434752. Throughput: 0: 11048.9. Samples: 197855664. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:53,978][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:54,157][626795] Updated weights for policy 0, policy_version 218682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:55,859][626795] Updated weights for policy 0, policy_version 218692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:57,525][626795] Updated weights for policy 0, policy_version 218702 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:58,975][24592] Fps is (10 sec: 48332.8, 60 sec: 42598.5, 300 sec: 43848.0). Total num frames: 1791672320. Throughput: 0: 11178.4. Samples: 197892078. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:27:58,976][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:27:59,306][626795] Updated weights for policy 0, policy_version 218712 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:00,994][626795] Updated weights for policy 0, policy_version 218722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:02,656][626795] Updated weights for policy 0, policy_version 218732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:03,583][626772] Signal inference workers to stop experience collection... (2700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:03,587][626772] Signal inference workers to resume experience collection... (2700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:03,596][626795] InferenceWorker_p0-w0: stopping experience collection (2700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:03,600][626795] InferenceWorker_p0-w0: resuming experience collection (2700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:03,975][24592] Fps is (10 sec: 47519.4, 60 sec: 42598.4, 300 sec: 43875.9). Total num frames: 1791909888. Throughput: 0: 11161.3. Samples: 197963676. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:03,977][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:04,345][626795] Updated weights for policy 0, policy_version 218742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:06,119][626795] Updated weights for policy 0, policy_version 218752 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:07,842][626795] Updated weights for policy 0, policy_version 218762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:08,975][24592] Fps is (10 sec: 47513.5, 60 sec: 45195.4, 300 sec: 43848.1). Total num frames: 1792147456. Throughput: 0: 11133.3. Samples: 198035844. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:08,976][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:09,468][626795] Updated weights for policy 0, policy_version 218772 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:11,337][626795] Updated weights for policy 0, policy_version 218782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:13,157][626795] Updated weights for policy 0, policy_version 218792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:13,975][24592] Fps is (10 sec: 46694.3, 60 sec: 45329.0, 300 sec: 43792.5). Total num frames: 1792376832. Throughput: 0: 11081.0. Samples: 198069156. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:13,977][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:14,842][626795] Updated weights for policy 0, policy_version 218802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:16,597][626795] Updated weights for policy 0, policy_version 218812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:20,069][24592] Fps is (10 sec: 32490.4, 60 sec: 42640.4, 300 sec: 43271.0). Total num frames: 1792507904. Throughput: 0: 10009.2. Samples: 198104838. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:20,071][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:21,564][626795] Updated weights for policy 0, policy_version 218822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:23,328][626795] Updated weights for policy 0, policy_version 218832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:23,975][24592] Fps is (10 sec: 31949.0, 60 sec: 42461.8, 300 sec: 43237.1). Total num frames: 1792696320. Throughput: 0: 10135.6. Samples: 198172314. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:23,976][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:25,051][626795] Updated weights for policy 0, policy_version 218842 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:26,719][626795] Updated weights for policy 0, policy_version 218852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:28,370][626795] Updated weights for policy 0, policy_version 218862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:28,976][24592] Fps is (10 sec: 48748.2, 60 sec: 42598.3, 300 sec: 43715.1). Total num frames: 1792942080. Throughput: 0: 11052.1. Samples: 198208758. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:28,977][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:30,182][626795] Updated weights for policy 0, policy_version 218872 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:31,852][626795] Updated weights for policy 0, policy_version 218882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:33,507][626795] Updated weights for policy 0, policy_version 218892 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:33,978][24592] Fps is (10 sec: 48321.8, 60 sec: 42460.3, 300 sec: 43764.4). Total num frames: 1793179648. Throughput: 0: 11049.0. Samples: 198281022. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:33,978][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:35,238][626795] Updated weights for policy 0, policy_version 218902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:36,772][626795] Updated weights for policy 0, policy_version 218912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:38,648][626795] Updated weights for policy 0, policy_version 218922 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:38,975][24592] Fps is (10 sec: 47516.0, 60 sec: 42325.4, 300 sec: 43764.7). Total num frames: 1793417216. Throughput: 0: 11062.8. Samples: 198353478. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:38,976][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:40,317][626795] Updated weights for policy 0, policy_version 218932 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:42,044][626795] Updated weights for policy 0, policy_version 218942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:43,788][626795] Updated weights for policy 0, policy_version 218952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:43,975][24592] Fps is (10 sec: 48344.0, 60 sec: 45109.6, 300 sec: 43764.7). Total num frames: 1793662976. Throughput: 0: 11040.7. Samples: 198388908. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:43,976][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:45,489][626795] Updated weights for policy 0, policy_version 218962 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:47,158][626795] Updated weights for policy 0, policy_version 218972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:48,804][626795] Updated weights for policy 0, policy_version 218982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:48,975][24592] Fps is (10 sec: 48332.7, 60 sec: 45192.5, 300 sec: 43764.7). Total num frames: 1793900544. Throughput: 0: 11055.2. Samples: 198461160. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:48,977][24592] Avg episode reward: [(0, '4.886')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:50,517][626795] Updated weights for policy 0, policy_version 218992 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:52,228][626795] Updated weights for policy 0, policy_version 219002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:55,727][24592] Fps is (10 sec: 34158.8, 60 sec: 42585.2, 300 sec: 43230.4). Total num frames: 1794064384. Throughput: 0: 9874.9. Samples: 198497508. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:55,727][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:57,392][626795] Updated weights for policy 0, policy_version 219012 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:58,975][24592] Fps is (10 sec: 31948.8, 60 sec: 42461.8, 300 sec: 43181.6). Total num frames: 1794220032. Throughput: 0: 10204.4. Samples: 198528354. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:28:58,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:28:59,109][626795] Updated weights for policy 0, policy_version 219022 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:00,866][626795] Updated weights for policy 0, policy_version 219032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:02,457][626795] Updated weights for policy 0, policy_version 219042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:03,975][24592] Fps is (10 sec: 47669.2, 60 sec: 42461.8, 300 sec: 43679.6). Total num frames: 1794457600. Throughput: 0: 11302.1. Samples: 198601068. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:03,976][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000219050_1794457600.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:04,039][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000217781_1784061952.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:04,255][626795] Updated weights for policy 0, policy_version 219052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:05,929][626795] Updated weights for policy 0, policy_version 219062 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:07,629][626795] Updated weights for policy 0, policy_version 219072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:08,975][24592] Fps is (10 sec: 48332.7, 60 sec: 42598.4, 300 sec: 43764.8). Total num frames: 1794703360. Throughput: 0: 11118.1. Samples: 198672630. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:08,977][24592] Avg episode reward: [(0, '4.798')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:09,329][626795] Updated weights for policy 0, policy_version 219082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:11,156][626795] Updated weights for policy 0, policy_version 219092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:12,660][626795] Updated weights for policy 0, policy_version 219102 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:13,975][24592] Fps is (10 sec: 48333.3, 60 sec: 42735.0, 300 sec: 43764.7). Total num frames: 1794940928. Throughput: 0: 11093.6. Samples: 198707964. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:13,976][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:14,386][626795] Updated weights for policy 0, policy_version 219112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:16,174][626795] Updated weights for policy 0, policy_version 219122 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:17,842][626795] Updated weights for policy 0, policy_version 219132 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:18,975][24592] Fps is (10 sec: 47513.4, 60 sec: 45336.4, 300 sec: 43736.9). Total num frames: 1795178496. Throughput: 0: 11099.7. Samples: 198780486. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:18,976][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:19,548][626795] Updated weights for policy 0, policy_version 219142 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:21,182][626795] Updated weights for policy 0, policy_version 219152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:22,995][626795] Updated weights for policy 0, policy_version 219162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:23,975][24592] Fps is (10 sec: 48332.6, 60 sec: 45465.6, 300 sec: 43737.9). Total num frames: 1795424256. Throughput: 0: 11101.7. Samples: 198853056. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:23,977][24592] Avg episode reward: [(0, '4.894')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:24,631][626795] Updated weights for policy 0, policy_version 219172 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:26,314][626795] Updated weights for policy 0, policy_version 219182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:31,158][24592] Fps is (10 sec: 35639.0, 60 sec: 42947.8, 300 sec: 43222.8). Total num frames: 1795612672. Throughput: 0: 10587.5. Samples: 198888456. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:31,159][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:31,262][626795] Updated weights for policy 0, policy_version 219192 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:33,013][626795] Updated weights for policy 0, policy_version 219202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:33,975][24592] Fps is (10 sec: 31948.8, 60 sec: 42736.6, 300 sec: 43153.8). Total num frames: 1795743744. Throughput: 0: 10240.0. Samples: 198921960. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:33,976][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:34,692][626795] Updated weights for policy 0, policy_version 219212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:36,417][626795] Updated weights for policy 0, policy_version 219222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:38,218][626795] Updated weights for policy 0, policy_version 219232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:38,975][24592] Fps is (10 sec: 48204.9, 60 sec: 42871.5, 300 sec: 43666.3). Total num frames: 1795989504. Throughput: 0: 11475.9. Samples: 198993828. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:38,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:39,828][626795] Updated weights for policy 0, policy_version 219242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:41,573][626795] Updated weights for policy 0, policy_version 219252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:43,243][626795] Updated weights for policy 0, policy_version 219262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:43,975][24592] Fps is (10 sec: 48332.9, 60 sec: 42734.9, 300 sec: 43737.0). Total num frames: 1796227072. Throughput: 0: 11154.1. Samples: 199030290. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:43,976][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:45,024][626795] Updated weights for policy 0, policy_version 219272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:46,850][626795] Updated weights for policy 0, policy_version 219282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:48,647][626795] Updated weights for policy 0, policy_version 219292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:48,975][24592] Fps is (10 sec: 45875.0, 60 sec: 42461.8, 300 sec: 43653.6). Total num frames: 1796448256. Throughput: 0: 11063.6. Samples: 199098930. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:48,976][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:50,472][626795] Updated weights for policy 0, policy_version 219302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:52,417][626795] Updated weights for policy 0, policy_version 219312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:53,976][24592] Fps is (10 sec: 44235.7, 60 sec: 44722.7, 300 sec: 43570.3). Total num frames: 1796669440. Throughput: 0: 10961.5. Samples: 199165902. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:53,976][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:54,273][626795] Updated weights for policy 0, policy_version 219322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:56,132][626795] Updated weights for policy 0, policy_version 219332 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:57,937][626795] Updated weights for policy 0, policy_version 219342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:58,975][24592] Fps is (10 sec: 43417.7, 60 sec: 44373.3, 300 sec: 43487.2). Total num frames: 1796882432. Throughput: 0: 10900.1. Samples: 199198470. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:29:58,977][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:29:59,843][626795] Updated weights for policy 0, policy_version 219352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:01,667][626795] Updated weights for policy 0, policy_version 219362 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:05,228][24592] Fps is (10 sec: 37129.8, 60 sec: 42931.1, 300 sec: 43164.9). Total num frames: 1797087232. Throughput: 0: 10473.3. Samples: 199264902. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:05,230][24592] Avg episode reward: [(0, '4.366')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:05,415][626795] Updated weights for policy 0, policy_version 219372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:07,212][626795] Updated weights for policy 0, policy_version 219382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:08,975][24592] Fps is (10 sec: 36864.1, 60 sec: 42461.9, 300 sec: 43070.5). Total num frames: 1797251072. Throughput: 0: 10166.0. Samples: 199310526. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:08,977][24592] Avg episode reward: [(0, '4.892')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:09,111][626795] Updated weights for policy 0, policy_version 219392 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:11,045][626795] Updated weights for policy 0, policy_version 219402 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:12,865][626795] Updated weights for policy 0, policy_version 219412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:13,975][24592] Fps is (10 sec: 44015.0, 60 sec: 42188.8, 300 sec: 43014.9). Total num frames: 1797472256. Throughput: 0: 10614.0. Samples: 199342920. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:13,977][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:14,676][626795] Updated weights for policy 0, policy_version 219422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:16,660][626795] Updated weights for policy 0, policy_version 219432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:18,491][626795] Updated weights for policy 0, policy_version 219442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:18,975][24592] Fps is (10 sec: 43417.4, 60 sec: 41779.2, 300 sec: 43487.0). Total num frames: 1797685248. Throughput: 0: 10807.2. Samples: 199408284. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:18,976][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:20,268][626795] Updated weights for policy 0, policy_version 219452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:21,982][626795] Updated weights for policy 0, policy_version 219462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:23,750][626795] Updated weights for policy 0, policy_version 219472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:23,975][24592] Fps is (10 sec: 45055.7, 60 sec: 41642.6, 300 sec: 43487.0). Total num frames: 1797922816. Throughput: 0: 10770.8. Samples: 199478514. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:23,985][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:25,454][626795] Updated weights for policy 0, policy_version 219482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:27,326][626795] Updated weights for policy 0, policy_version 219492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:28,975][24592] Fps is (10 sec: 46694.6, 60 sec: 43923.2, 300 sec: 43431.5). Total num frames: 1798152192. Throughput: 0: 10734.8. Samples: 199513356. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:28,977][24592] Avg episode reward: [(0, '4.864')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:29,115][626795] Updated weights for policy 0, policy_version 219502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:30,858][626795] Updated weights for policy 0, policy_version 219512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:32,698][626795] Updated weights for policy 0, policy_version 219522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:33,975][24592] Fps is (10 sec: 45875.4, 60 sec: 43963.7, 300 sec: 43403.7). Total num frames: 1798381568. Throughput: 0: 10721.7. Samples: 199581408. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:33,976][24592] Avg episode reward: [(0, '4.851')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:34,455][626795] Updated weights for policy 0, policy_version 219532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:36,222][626795] Updated weights for policy 0, policy_version 219542 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:39,123][24592] Fps is (10 sec: 38748.3, 60 sec: 42493.6, 300 sec: 43132.2). Total num frames: 1798545408. Throughput: 0: 9968.2. Samples: 199615944. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:39,124][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:39,618][626795] Updated weights for policy 0, policy_version 219552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:41,496][626795] Updated weights for policy 0, policy_version 219562 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:43,178][626795] Updated weights for policy 0, policy_version 219572 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:43,976][24592] Fps is (10 sec: 38500.8, 60 sec: 42325.0, 300 sec: 43070.4). Total num frames: 1798766592. Throughput: 0: 10393.2. Samples: 199666170. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:43,978][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:44,963][626795] Updated weights for policy 0, policy_version 219582 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:46,610][626795] Updated weights for policy 0, policy_version 219592 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:48,271][626795] Updated weights for policy 0, policy_version 219602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:48,981][24592] Fps is (10 sec: 47369.6, 60 sec: 42731.1, 300 sec: 43097.5). Total num frames: 1799012352. Throughput: 0: 10797.1. Samples: 199737306. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:48,982][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:50,075][626795] Updated weights for policy 0, policy_version 219612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:51,796][626795] Updated weights for policy 0, policy_version 219622 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:53,692][626795] Updated weights for policy 0, policy_version 219632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:53,975][24592] Fps is (10 sec: 46696.0, 60 sec: 42735.1, 300 sec: 43570.4). Total num frames: 1799233536. Throughput: 0: 11024.4. Samples: 199806624. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:53,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:55,540][626795] Updated weights for policy 0, policy_version 219642 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:57,375][626795] Updated weights for policy 0, policy_version 219652 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:58,975][24592] Fps is (10 sec: 45079.9, 60 sec: 43007.9, 300 sec: 43542.8). Total num frames: 1799462912. Throughput: 0: 11051.7. Samples: 199840248. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:30:58,978][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:30:59,147][626795] Updated weights for policy 0, policy_version 219662 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:00,935][626795] Updated weights for policy 0, policy_version 219672 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:02,852][626795] Updated weights for policy 0, policy_version 219682 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:03,975][24592] Fps is (10 sec: 45056.2, 60 sec: 44203.7, 300 sec: 43459.3). Total num frames: 1799684096. Throughput: 0: 11107.1. Samples: 199908102. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:03,984][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:03,987][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000219688_1799684096.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:04,075][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000218427_1789353984.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:04,680][626795] Updated weights for policy 0, policy_version 219692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:06,495][626795] Updated weights for policy 0, policy_version 219702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:08,324][626795] Updated weights for policy 0, policy_version 219712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:08,975][24592] Fps is (10 sec: 44237.3, 60 sec: 44236.8, 300 sec: 43403.7). Total num frames: 1799905280. Throughput: 0: 11025.1. Samples: 199974642. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:08,977][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:10,115][626795] Updated weights for policy 0, policy_version 219722 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:13,760][626795] Updated weights for policy 0, policy_version 219732 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:13,975][24592] Fps is (10 sec: 36864.0, 60 sec: 43008.0, 300 sec: 43070.5). Total num frames: 1800052736. Throughput: 0: 10924.0. Samples: 200004936. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:13,976][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:15,511][626795] Updated weights for policy 0, policy_version 219742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:17,429][626795] Updated weights for policy 0, policy_version 219752 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:18,975][24592] Fps is (10 sec: 36863.9, 60 sec: 43144.5, 300 sec: 43015.0). Total num frames: 1800273920. Throughput: 0: 10538.4. Samples: 200055636. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:18,978][24592] Avg episode reward: [(0, '4.901')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:19,160][626795] Updated weights for policy 0, policy_version 219762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:21,059][626795] Updated weights for policy 0, policy_version 219772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:22,905][626795] Updated weights for policy 0, policy_version 219782 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:23,975][24592] Fps is (10 sec: 44236.5, 60 sec: 42871.5, 300 sec: 42931.6). Total num frames: 1800495104. Throughput: 0: 11296.3. Samples: 200122608. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:23,976][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:24,775][626795] Updated weights for policy 0, policy_version 219792 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:26,556][626795] Updated weights for policy 0, policy_version 219802 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:28,197][626795] Updated weights for policy 0, policy_version 219812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:28,976][24592] Fps is (10 sec: 45055.2, 60 sec: 42871.3, 300 sec: 43431.5). Total num frames: 1800724480. Throughput: 0: 10899.5. Samples: 200156646. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:28,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:30,193][626795] Updated weights for policy 0, policy_version 219822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:31,890][626795] Updated weights for policy 0, policy_version 219832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:33,598][626795] Updated weights for policy 0, policy_version 219842 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:33,976][24592] Fps is (10 sec: 45875.0, 60 sec: 42871.4, 300 sec: 43403.7). Total num frames: 1800953856. Throughput: 0: 10843.4. Samples: 200225202. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:33,978][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:35,382][626795] Updated weights for policy 0, policy_version 219852 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:37,216][626795] Updated weights for policy 0, policy_version 219862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:38,975][24592] Fps is (10 sec: 45876.2, 60 sec: 44072.4, 300 sec: 43375.9). Total num frames: 1801183232. Throughput: 0: 10853.1. Samples: 200295012. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:38,976][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:38,999][626795] Updated weights for policy 0, policy_version 219872 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:40,711][626795] Updated weights for policy 0, policy_version 219882 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:42,598][626795] Updated weights for policy 0, policy_version 219892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:43,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44100.5, 300 sec: 43320.4). Total num frames: 1801412608. Throughput: 0: 10870.9. Samples: 200329440. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:43,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:44,358][626795] Updated weights for policy 0, policy_version 219902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:47,463][626795] Updated weights for policy 0, policy_version 219912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:48,976][24592] Fps is (10 sec: 40139.2, 60 sec: 42875.1, 300 sec: 43098.2). Total num frames: 1801584640. Throughput: 0: 10550.1. Samples: 200382858. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:48,977][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:49,293][626795] Updated weights for policy 0, policy_version 219922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:51,129][626795] Updated weights for policy 0, policy_version 219932 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:52,893][626795] Updated weights for policy 0, policy_version 219942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:53,975][24592] Fps is (10 sec: 40141.0, 60 sec: 43008.0, 300 sec: 43042.7). Total num frames: 1801814016. Throughput: 0: 10572.6. Samples: 200450412. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:53,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:54,706][626795] Updated weights for policy 0, policy_version 219952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:56,590][626795] Updated weights for policy 0, policy_version 219962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:31:58,265][626795] Updated weights for policy 0, policy_version 219972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:58,975][24592] Fps is (10 sec: 45057.6, 60 sec: 42871.5, 300 sec: 42987.2). Total num frames: 1802035200. Throughput: 0: 10638.8. Samples: 200483682. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:31:58,977][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:00,189][626795] Updated weights for policy 0, policy_version 219982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:01,901][626795] Updated weights for policy 0, policy_version 219992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:03,695][626795] Updated weights for policy 0, policy_version 220002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:03,975][24592] Fps is (10 sec: 45056.3, 60 sec: 43008.0, 300 sec: 43487.6). Total num frames: 1802264576. Throughput: 0: 11041.6. Samples: 200552508. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:03,977][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:05,391][626795] Updated weights for policy 0, policy_version 220012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:07,204][626795] Updated weights for policy 0, policy_version 220022 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:08,975][24592] Fps is (10 sec: 45875.1, 60 sec: 43144.5, 300 sec: 43514.8). Total num frames: 1802493952. Throughput: 0: 11099.2. Samples: 200622072. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:08,976][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:09,119][626795] Updated weights for policy 0, policy_version 220032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:10,843][626795] Updated weights for policy 0, policy_version 220042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:12,780][626795] Updated weights for policy 0, policy_version 220052 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:13,975][24592] Fps is (10 sec: 45875.5, 60 sec: 44509.9, 300 sec: 43459.3). Total num frames: 1802723328. Throughput: 0: 11073.9. Samples: 200654970. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:13,977][24592] Avg episode reward: [(0, '4.564')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:14,493][626795] Updated weights for policy 0, policy_version 220062 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:16,351][626795] Updated weights for policy 0, policy_version 220072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:18,083][626795] Updated weights for policy 0, policy_version 220082 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:20,078][24592] Fps is (10 sec: 40581.7, 60 sec: 43706.7, 300 sec: 43214.4). Total num frames: 1802944512. Throughput: 0: 10816.1. Samples: 200723850. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:20,079][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:21,271][626795] Updated weights for policy 0, policy_version 220092 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:22,944][626795] Updated weights for policy 0, policy_version 220102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:23,975][24592] Fps is (10 sec: 39321.3, 60 sec: 43690.7, 300 sec: 43153.8). Total num frames: 1803116544. Throughput: 0: 10738.7. Samples: 200778252. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:23,977][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:24,667][626795] Updated weights for policy 0, policy_version 220112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:26,437][626795] Updated weights for policy 0, policy_version 220122 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:28,288][626795] Updated weights for policy 0, policy_version 220132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:28,975][24592] Fps is (10 sec: 45114.9, 60 sec: 43690.8, 300 sec: 43098.3). Total num frames: 1803345920. Throughput: 0: 10729.1. Samples: 200812248. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:28,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:30,029][626795] Updated weights for policy 0, policy_version 220142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:31,840][626795] Updated weights for policy 0, policy_version 220152 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:33,638][626795] Updated weights for policy 0, policy_version 220162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:33,975][24592] Fps is (10 sec: 45874.7, 60 sec: 43690.7, 300 sec: 43042.7). Total num frames: 1803575296. Throughput: 0: 11078.9. Samples: 200881404. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:33,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:35,462][626795] Updated weights for policy 0, policy_version 220172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:37,292][626795] Updated weights for policy 0, policy_version 220182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:38,975][24592] Fps is (10 sec: 45875.2, 60 sec: 43690.6, 300 sec: 43551.2). Total num frames: 1803804672. Throughput: 0: 11085.2. Samples: 200949246. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:38,976][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:39,084][626795] Updated weights for policy 0, policy_version 220192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:40,819][626795] Updated weights for policy 0, policy_version 220202 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:42,668][626795] Updated weights for policy 0, policy_version 220212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:43,975][24592] Fps is (10 sec: 45875.2, 60 sec: 43690.7, 300 sec: 43542.5). Total num frames: 1804034048. Throughput: 0: 11115.2. Samples: 200983866. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:43,977][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:44,455][626795] Updated weights for policy 0, policy_version 220222 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:46,267][626795] Updated weights for policy 0, policy_version 220232 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:48,186][626795] Updated weights for policy 0, policy_version 220242 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:48,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44646.7, 300 sec: 43487.2). Total num frames: 1804263424. Throughput: 0: 11073.2. Samples: 201050802. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:48,977][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:49,859][626795] Updated weights for policy 0, policy_version 220252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:51,707][626795] Updated weights for policy 0, policy_version 220262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:53,975][24592] Fps is (10 sec: 39322.1, 60 sec: 43554.2, 300 sec: 43237.1). Total num frames: 1804427264. Throughput: 0: 10737.9. Samples: 201105276. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:53,976][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:54,836][626795] Updated weights for policy 0, policy_version 220272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:56,758][626795] Updated weights for policy 0, policy_version 220282 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:32:58,476][626795] Updated weights for policy 0, policy_version 220292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:58,976][24592] Fps is (10 sec: 37682.5, 60 sec: 43417.5, 300 sec: 43153.8). Total num frames: 1804640256. Throughput: 0: 10707.8. Samples: 201136824. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:32:58,977][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:00,391][626795] Updated weights for policy 0, policy_version 220302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:02,277][626795] Updated weights for policy 0, policy_version 220312 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:03,976][24592] Fps is (10 sec: 44235.5, 60 sec: 43417.4, 300 sec: 43126.0). Total num frames: 1804869632. Throughput: 0: 10937.8. Samples: 201203994. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:03,977][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000220321_1804869632.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:04,048][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000219050_1794457600.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:04,116][626795] Updated weights for policy 0, policy_version 220322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:05,916][626795] Updated weights for policy 0, policy_version 220332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:07,736][626795] Updated weights for policy 0, policy_version 220342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:08,976][24592] Fps is (10 sec: 45056.2, 60 sec: 43281.0, 300 sec: 43098.2). Total num frames: 1805090816. Throughput: 0: 10943.2. Samples: 201270696. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:08,976][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:09,663][626795] Updated weights for policy 0, policy_version 220352 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:11,443][626795] Updated weights for policy 0, policy_version 220362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:13,072][626795] Updated weights for policy 0, policy_version 220372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:13,976][24592] Fps is (10 sec: 45055.9, 60 sec: 43280.8, 300 sec: 43593.1). Total num frames: 1805320192. Throughput: 0: 10939.7. Samples: 201304536. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:13,978][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:14,826][626795] Updated weights for policy 0, policy_version 220382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:16,731][626795] Updated weights for policy 0, policy_version 220392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:18,513][626795] Updated weights for policy 0, policy_version 220402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:18,975][24592] Fps is (10 sec: 45875.8, 60 sec: 44230.4, 300 sec: 43570.3). Total num frames: 1805549568. Throughput: 0: 10948.2. Samples: 201374070. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:18,976][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:20,377][626795] Updated weights for policy 0, policy_version 220412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:22,078][626795] Updated weights for policy 0, policy_version 220422 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:23,886][626795] Updated weights for policy 0, policy_version 220432 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:23,976][24592] Fps is (10 sec: 45876.1, 60 sec: 44373.2, 300 sec: 43514.8). Total num frames: 1805778944. Throughput: 0: 10962.9. Samples: 201442578. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:23,976][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:25,721][626795] Updated weights for policy 0, policy_version 220442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:28,527][626795] Updated weights for policy 0, policy_version 220452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:28,975][24592] Fps is (10 sec: 40959.9, 60 sec: 43554.1, 300 sec: 43320.7). Total num frames: 1805959168. Throughput: 0: 10786.3. Samples: 201469248. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:28,976][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:30,331][626795] Updated weights for policy 0, policy_version 220462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:32,035][626795] Updated weights for policy 0, policy_version 220472 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:33,975][24592] Fps is (10 sec: 40141.5, 60 sec: 43417.7, 300 sec: 43264.9). Total num frames: 1806180352. Throughput: 0: 10710.3. Samples: 201532764. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:33,977][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:33,990][626795] Updated weights for policy 0, policy_version 220482 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:35,677][626795] Updated weights for policy 0, policy_version 220492 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:37,460][626795] Updated weights for policy 0, policy_version 220502 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:38,992][24592] Fps is (10 sec: 45799.8, 60 sec: 43542.2, 300 sec: 43234.7). Total num frames: 1806417920. Throughput: 0: 11046.1. Samples: 201602532. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:38,993][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:39,222][626795] Updated weights for policy 0, policy_version 220512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:41,158][626795] Updated weights for policy 0, policy_version 220522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:42,855][626795] Updated weights for policy 0, policy_version 220532 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:43,976][24592] Fps is (10 sec: 46692.2, 60 sec: 43553.9, 300 sec: 43209.3). Total num frames: 1806647296. Throughput: 0: 11095.9. Samples: 201636144. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:43,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:44,682][626795] Updated weights for policy 0, policy_version 220542 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:46,418][626795] Updated weights for policy 0, policy_version 220552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:48,182][626795] Updated weights for policy 0, policy_version 220562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:48,975][24592] Fps is (10 sec: 45950.9, 60 sec: 43554.1, 300 sec: 43690.8). Total num frames: 1806876672. Throughput: 0: 11137.5. Samples: 201705180. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:48,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:50,043][626795] Updated weights for policy 0, policy_version 220572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:51,865][626795] Updated weights for policy 0, policy_version 220582 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:53,680][626795] Updated weights for policy 0, policy_version 220592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:53,976][24592] Fps is (10 sec: 45056.0, 60 sec: 44509.5, 300 sec: 43653.6). Total num frames: 1807097856. Throughput: 0: 11159.8. Samples: 201772890. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:53,977][24592] Avg episode reward: [(0, '5.077')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:55,422][626795] Updated weights for policy 0, policy_version 220602 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:57,259][626795] Updated weights for policy 0, policy_version 220612 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:58,975][24592] Fps is (10 sec: 45056.0, 60 sec: 44783.1, 300 sec: 43625.9). Total num frames: 1807327232. Throughput: 0: 11161.8. Samples: 201806814. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:33:58,977][24592] Avg episode reward: [(0, '4.837')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:33:59,086][626795] Updated weights for policy 0, policy_version 220622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:01,028][626772] Signal inference workers to stop experience collection... (2750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:01,032][626772] Signal inference workers to resume experience collection... (2750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:01,046][626795] InferenceWorker_p0-w0: stopping experience collection (2750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:01,050][626795] InferenceWorker_p0-w0: resuming experience collection (2750 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:02,299][626795] Updated weights for policy 0, policy_version 220632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:03,876][626795] Updated weights for policy 0, policy_version 220642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:03,975][24592] Fps is (10 sec: 40142.7, 60 sec: 43827.4, 300 sec: 43376.0). Total num frames: 1807499264. Throughput: 0: 10810.9. Samples: 201860562. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:03,976][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:05,679][626795] Updated weights for policy 0, policy_version 220652 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:07,390][626795] Updated weights for policy 0, policy_version 220662 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:08,976][24592] Fps is (10 sec: 40139.7, 60 sec: 43963.6, 300 sec: 43348.1). Total num frames: 1807728640. Throughput: 0: 10831.3. Samples: 201929988. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:08,977][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:09,331][626795] Updated weights for policy 0, policy_version 220672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:11,030][626795] Updated weights for policy 0, policy_version 220682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:12,775][626795] Updated weights for policy 0, policy_version 220692 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:13,975][24592] Fps is (10 sec: 45874.8, 60 sec: 43963.9, 300 sec: 43320.4). Total num frames: 1807958016. Throughput: 0: 11009.2. Samples: 201964662. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:13,977][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:14,609][626795] Updated weights for policy 0, policy_version 220702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:16,477][626795] Updated weights for policy 0, policy_version 220712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:18,291][626795] Updated weights for policy 0, policy_version 220722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:18,975][24592] Fps is (10 sec: 45057.2, 60 sec: 43827.2, 300 sec: 43237.1). Total num frames: 1808179200. Throughput: 0: 11095.1. Samples: 202032042. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:18,976][24592] Avg episode reward: [(0, '4.855')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:20,012][626795] Updated weights for policy 0, policy_version 220732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:21,765][626795] Updated weights for policy 0, policy_version 220742 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:23,707][626795] Updated weights for policy 0, policy_version 220752 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:23,977][24592] Fps is (10 sec: 45051.1, 60 sec: 43826.5, 300 sec: 43699.1). Total num frames: 1808408576. Throughput: 0: 11073.4. Samples: 202100664. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:23,980][24592] Avg episode reward: [(0, '5.049')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:25,575][626795] Updated weights for policy 0, policy_version 220762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:27,270][626795] Updated weights for policy 0, policy_version 220772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:28,977][24592] Fps is (10 sec: 45867.7, 60 sec: 44645.2, 300 sec: 43708.9). Total num frames: 1808637952. Throughput: 0: 11063.3. Samples: 202134006. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:28,978][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:29,117][626795] Updated weights for policy 0, policy_version 220782 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:30,991][626795] Updated weights for policy 0, policy_version 220792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:34,111][24592] Fps is (10 sec: 38801.2, 60 sec: 43592.4, 300 sec: 43411.6). Total num frames: 1808801792. Throughput: 0: 11004.8. Samples: 202201884. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:34,112][24592] Avg episode reward: [(0, '4.463')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:34,127][626795] Updated weights for policy 0, policy_version 220802 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:35,892][626795] Updated weights for policy 0, policy_version 220812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:37,566][626795] Updated weights for policy 0, policy_version 220822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:38,976][24592] Fps is (10 sec: 39326.6, 60 sec: 43565.8, 300 sec: 43403.7). Total num frames: 1809031168. Throughput: 0: 10748.6. Samples: 202256574. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:38,979][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:39,275][626795] Updated weights for policy 0, policy_version 220832 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:41,203][626795] Updated weights for policy 0, policy_version 220842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:43,030][626795] Updated weights for policy 0, policy_version 220852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:43,975][24592] Fps is (10 sec: 46504.4, 60 sec: 43554.5, 300 sec: 43431.5). Total num frames: 1809260544. Throughput: 0: 10745.1. Samples: 202290342. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:43,977][24592] Avg episode reward: [(0, '4.872')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:44,764][626795] Updated weights for policy 0, policy_version 220862 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:46,480][626795] Updated weights for policy 0, policy_version 220872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:48,250][626795] Updated weights for policy 0, policy_version 220882 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:48,976][24592] Fps is (10 sec: 45874.6, 60 sec: 43553.8, 300 sec: 43459.2). Total num frames: 1809489920. Throughput: 0: 11082.9. Samples: 202359300. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:48,979][24592] Avg episode reward: [(0, '4.978')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:50,160][626795] Updated weights for policy 0, policy_version 220892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:52,021][626795] Updated weights for policy 0, policy_version 220902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:53,608][626795] Updated weights for policy 0, policy_version 220912 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:53,975][24592] Fps is (10 sec: 45875.1, 60 sec: 43691.0, 300 sec: 43514.8). Total num frames: 1809719296. Throughput: 0: 11062.7. Samples: 202427808. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:53,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:55,613][626795] Updated weights for policy 0, policy_version 220922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:57,345][626795] Updated weights for policy 0, policy_version 220932 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:58,976][24592] Fps is (10 sec: 45875.3, 60 sec: 43690.3, 300 sec: 43783.9). Total num frames: 1809948672. Throughput: 0: 11035.9. Samples: 202461282. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:34:58,978][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:34:59,143][626795] Updated weights for policy 0, policy_version 220942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:00,925][626795] Updated weights for policy 0, policy_version 220952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:02,813][626795] Updated weights for policy 0, policy_version 220962 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:03,975][24592] Fps is (10 sec: 45056.2, 60 sec: 44509.9, 300 sec: 43792.5). Total num frames: 1810169856. Throughput: 0: 11055.3. Samples: 202529532. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:03,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:04,035][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000220969_1810178048.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:04,086][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000219688_1799684096.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:04,551][626795] Updated weights for policy 0, policy_version 220972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:06,340][626795] Updated weights for policy 0, policy_version 220982 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:08,975][24592] Fps is (10 sec: 39323.3, 60 sec: 43554.3, 300 sec: 43625.9). Total num frames: 1810341888. Throughput: 0: 10717.7. Samples: 202582950. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:08,976][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:09,416][626795] Updated weights for policy 0, policy_version 220992 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:11,172][626795] Updated weights for policy 0, policy_version 221002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:12,947][626795] Updated weights for policy 0, policy_version 221012 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:13,975][24592] Fps is (10 sec: 40960.0, 60 sec: 43690.7, 300 sec: 43709.2). Total num frames: 1810579456. Throughput: 0: 10768.8. Samples: 202618584. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:13,976][24592] Avg episode reward: [(0, '4.477')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:14,710][626795] Updated weights for policy 0, policy_version 221022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:16,551][626795] Updated weights for policy 0, policy_version 221032 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:18,187][626795] Updated weights for policy 0, policy_version 221042 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:18,975][24592] Fps is (10 sec: 46694.4, 60 sec: 43827.2, 300 sec: 43681.4). Total num frames: 1810808832. Throughput: 0: 10820.3. Samples: 202687332. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:18,977][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:20,036][626795] Updated weights for policy 0, policy_version 221052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:21,772][626795] Updated weights for policy 0, policy_version 221062 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:23,639][626795] Updated weights for policy 0, policy_version 221072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:23,975][24592] Fps is (10 sec: 45056.1, 60 sec: 43691.5, 300 sec: 43653.6). Total num frames: 1811030016. Throughput: 0: 11113.3. Samples: 202756668. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:23,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:25,430][626795] Updated weights for policy 0, policy_version 221082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:27,116][626795] Updated weights for policy 0, policy_version 221092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:28,940][626795] Updated weights for policy 0, policy_version 221102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:28,976][24592] Fps is (10 sec: 45873.8, 60 sec: 43828.2, 300 sec: 43681.4). Total num frames: 1811267584. Throughput: 0: 11137.1. Samples: 202791516. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:28,977][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:30,744][626795] Updated weights for policy 0, policy_version 221112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:32,519][626795] Updated weights for policy 0, policy_version 221122 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:33,975][24592] Fps is (10 sec: 46694.0, 60 sec: 45020.9, 300 sec: 43925.6). Total num frames: 1811496960. Throughput: 0: 11127.7. Samples: 202860042. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:33,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:34,304][626795] Updated weights for policy 0, policy_version 221132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:36,152][626795] Updated weights for policy 0, policy_version 221142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:37,858][626795] Updated weights for policy 0, policy_version 221152 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:38,975][24592] Fps is (10 sec: 45057.6, 60 sec: 44783.2, 300 sec: 43903.6). Total num frames: 1811718144. Throughput: 0: 11110.9. Samples: 202927800. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:38,977][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:39,680][626795] Updated weights for policy 0, policy_version 221162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:42,819][626795] Updated weights for policy 0, policy_version 221172 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:43,975][24592] Fps is (10 sec: 39321.6, 60 sec: 43827.2, 300 sec: 43654.4). Total num frames: 1811890176. Throughput: 0: 10785.6. Samples: 202946628. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:43,976][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:44,522][626795] Updated weights for policy 0, policy_version 221182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:46,398][626795] Updated weights for policy 0, policy_version 221192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:48,223][626795] Updated weights for policy 0, policy_version 221202 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:48,975][24592] Fps is (10 sec: 40140.4, 60 sec: 43827.5, 300 sec: 43681.4). Total num frames: 1812119552. Throughput: 0: 10814.5. Samples: 203016186. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:48,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:50,073][626795] Updated weights for policy 0, policy_version 221212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:51,747][626795] Updated weights for policy 0, policy_version 221222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:53,573][626795] Updated weights for policy 0, policy_version 221232 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:53,975][24592] Fps is (10 sec: 45875.4, 60 sec: 43827.2, 300 sec: 43681.4). Total num frames: 1812348928. Throughput: 0: 11150.5. Samples: 203084724. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:53,976][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:55,446][626795] Updated weights for policy 0, policy_version 221242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:57,158][626795] Updated weights for policy 0, policy_version 221252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:58,975][24592] Fps is (10 sec: 45056.5, 60 sec: 43691.0, 300 sec: 43681.4). Total num frames: 1812570112. Throughput: 0: 11098.7. Samples: 203118024. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:35:58,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:35:59,061][626795] Updated weights for policy 0, policy_version 221262 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:00,727][626795] Updated weights for policy 0, policy_version 221272 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:02,636][626795] Updated weights for policy 0, policy_version 221282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:03,976][24592] Fps is (10 sec: 45053.8, 60 sec: 43826.8, 300 sec: 43709.1). Total num frames: 1812799488. Throughput: 0: 11093.5. Samples: 203186544. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:03,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:04,418][626795] Updated weights for policy 0, policy_version 221292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:06,149][626795] Updated weights for policy 0, policy_version 221302 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:07,979][626795] Updated weights for policy 0, policy_version 221312 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:08,975][24592] Fps is (10 sec: 45874.7, 60 sec: 44782.9, 300 sec: 43986.9). Total num frames: 1813028864. Throughput: 0: 11089.0. Samples: 203255676. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:08,977][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:09,806][626795] Updated weights for policy 0, policy_version 221322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:11,523][626795] Updated weights for policy 0, policy_version 221332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:13,445][626795] Updated weights for policy 0, policy_version 221342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:14,811][24592] Fps is (10 sec: 40070.6, 60 sec: 43629.0, 300 sec: 43807.2). Total num frames: 1813233664. Throughput: 0: 10866.5. Samples: 203289588. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:14,812][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:16,425][626795] Updated weights for policy 0, policy_version 221352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:18,166][626795] Updated weights for policy 0, policy_version 221362 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:18,975][24592] Fps is (10 sec: 40140.9, 60 sec: 43690.7, 300 sec: 43848.0). Total num frames: 1813430272. Throughput: 0: 10764.7. Samples: 203344452. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:18,977][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:19,922][626795] Updated weights for policy 0, policy_version 221372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:21,762][626795] Updated weights for policy 0, policy_version 221382 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:23,649][626795] Updated weights for policy 0, policy_version 221392 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:23,975][24592] Fps is (10 sec: 45589.6, 60 sec: 43690.6, 300 sec: 43820.3). Total num frames: 1813651456. Throughput: 0: 10760.1. Samples: 203412006. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:23,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:25,399][626795] Updated weights for policy 0, policy_version 221402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:27,116][626795] Updated weights for policy 0, policy_version 221412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:28,872][626795] Updated weights for policy 0, policy_version 221422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:28,975][24592] Fps is (10 sec: 45875.2, 60 sec: 43690.9, 300 sec: 43848.0). Total num frames: 1813889024. Throughput: 0: 11124.9. Samples: 203447250. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:28,977][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:30,724][626795] Updated weights for policy 0, policy_version 221432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:32,574][626795] Updated weights for policy 0, policy_version 221442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:33,975][24592] Fps is (10 sec: 46694.2, 60 sec: 43690.7, 300 sec: 43848.0). Total num frames: 1814118400. Throughput: 0: 11106.7. Samples: 203515986. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:33,977][24592] Avg episode reward: [(0, '4.430')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:34,277][626795] Updated weights for policy 0, policy_version 221452 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:36,024][626795] Updated weights for policy 0, policy_version 221462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:37,889][626795] Updated weights for policy 0, policy_version 221472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:38,987][24592] Fps is (10 sec: 45005.8, 60 sec: 43682.5, 300 sec: 43818.6). Total num frames: 1814339584. Throughput: 0: 11092.6. Samples: 203584014. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:38,988][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:39,702][626795] Updated weights for policy 0, policy_version 221482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:41,450][626795] Updated weights for policy 0, policy_version 221492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:43,283][626795] Updated weights for policy 0, policy_version 221502 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:43,975][24592] Fps is (10 sec: 45056.1, 60 sec: 44646.4, 300 sec: 44014.7). Total num frames: 1814568960. Throughput: 0: 11128.8. Samples: 203618820. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:43,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:45,080][626795] Updated weights for policy 0, policy_version 221512 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:46,849][626795] Updated weights for policy 0, policy_version 221522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:48,976][24592] Fps is (10 sec: 40184.7, 60 sec: 43690.5, 300 sec: 43820.2). Total num frames: 1814740992. Throughput: 0: 10975.1. Samples: 203680422. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:48,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:49,877][626795] Updated weights for policy 0, policy_version 221532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:51,605][626795] Updated weights for policy 0, policy_version 221542 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:53,480][626795] Updated weights for policy 0, policy_version 221552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:53,975][24592] Fps is (10 sec: 40960.0, 60 sec: 43827.2, 300 sec: 43875.8). Total num frames: 1814978560. Throughput: 0: 10797.7. Samples: 203741574. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:53,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:55,403][626795] Updated weights for policy 0, policy_version 221562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:57,117][626795] Updated weights for policy 0, policy_version 221572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:36:58,840][626795] Updated weights for policy 0, policy_version 221582 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:58,975][24592] Fps is (10 sec: 46695.3, 60 sec: 43963.7, 300 sec: 43875.8). Total num frames: 1815207936. Throughput: 0: 10978.8. Samples: 203774460. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:36:58,976][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:00,745][626795] Updated weights for policy 0, policy_version 221592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:02,454][626795] Updated weights for policy 0, policy_version 221602 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:03,975][24592] Fps is (10 sec: 45055.8, 60 sec: 43827.5, 300 sec: 43848.0). Total num frames: 1815429120. Throughput: 0: 11090.3. Samples: 203843514. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:03,976][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000221610_1815429120.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:04,042][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000220321_1804869632.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:04,352][626795] Updated weights for policy 0, policy_version 221612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:06,095][626795] Updated weights for policy 0, policy_version 221622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:07,769][626795] Updated weights for policy 0, policy_version 221632 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:08,975][24592] Fps is (10 sec: 45874.8, 60 sec: 43963.7, 300 sec: 43875.8). Total num frames: 1815666688. Throughput: 0: 11131.8. Samples: 203912940. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:08,984][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:09,671][626795] Updated weights for policy 0, policy_version 221642 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:11,571][626795] Updated weights for policy 0, policy_version 221652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:13,298][626795] Updated weights for policy 0, policy_version 221662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:13,975][24592] Fps is (10 sec: 45875.2, 60 sec: 44861.7, 300 sec: 44040.4). Total num frames: 1815887872. Throughput: 0: 11075.6. Samples: 203945652. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:13,976][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:15,028][626795] Updated weights for policy 0, policy_version 221672 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:16,796][626795] Updated weights for policy 0, policy_version 221682 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:18,629][626795] Updated weights for policy 0, policy_version 221692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:18,975][24592] Fps is (10 sec: 44237.3, 60 sec: 44646.4, 300 sec: 44042.4). Total num frames: 1816109056. Throughput: 0: 11086.0. Samples: 204014856. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:18,976][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:20,490][626795] Updated weights for policy 0, policy_version 221702 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:23,431][626795] Updated weights for policy 0, policy_version 221712 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:23,976][24592] Fps is (10 sec: 40139.9, 60 sec: 43963.5, 300 sec: 43875.8). Total num frames: 1816289280. Throughput: 0: 10784.9. Samples: 204069216. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:23,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:25,299][626795] Updated weights for policy 0, policy_version 221722 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:27,085][626795] Updated weights for policy 0, policy_version 221732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:28,855][626795] Updated weights for policy 0, policy_version 221742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:28,976][24592] Fps is (10 sec: 40140.1, 60 sec: 43690.5, 300 sec: 43848.0). Total num frames: 1816510464. Throughput: 0: 10761.8. Samples: 204103104. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:28,976][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:30,566][626795] Updated weights for policy 0, policy_version 221752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:32,396][626795] Updated weights for policy 0, policy_version 221762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:33,975][24592] Fps is (10 sec: 45876.6, 60 sec: 43827.3, 300 sec: 43875.8). Total num frames: 1816748032. Throughput: 0: 10932.7. Samples: 204172392. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:33,978][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:34,162][626795] Updated weights for policy 0, policy_version 221772 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:36,049][626795] Updated weights for policy 0, policy_version 221782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:37,837][626795] Updated weights for policy 0, policy_version 221792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:38,975][24592] Fps is (10 sec: 45876.1, 60 sec: 43835.4, 300 sec: 43848.0). Total num frames: 1816969216. Throughput: 0: 11072.4. Samples: 204239832. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:38,976][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:39,563][626795] Updated weights for policy 0, policy_version 221802 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:41,369][626795] Updated weights for policy 0, policy_version 221812 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:43,112][626795] Updated weights for policy 0, policy_version 221822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:43,975][24592] Fps is (10 sec: 45055.8, 60 sec: 43827.2, 300 sec: 43848.0). Total num frames: 1817198592. Throughput: 0: 11120.8. Samples: 204274896. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:43,976][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:44,909][626795] Updated weights for policy 0, policy_version 221832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:46,760][626795] Updated weights for policy 0, policy_version 221842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:48,535][626795] Updated weights for policy 0, policy_version 221852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:48,976][24592] Fps is (10 sec: 45872.2, 60 sec: 44782.6, 300 sec: 44070.1). Total num frames: 1817427968. Throughput: 0: 11093.9. Samples: 204342744. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:48,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:50,279][626795] Updated weights for policy 0, policy_version 221862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:52,198][626795] Updated weights for policy 0, policy_version 221872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:53,975][24592] Fps is (10 sec: 45055.9, 60 sec: 44509.8, 300 sec: 44098.0). Total num frames: 1817649152. Throughput: 0: 11081.2. Samples: 204411594. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:53,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:53,988][626795] Updated weights for policy 0, policy_version 221882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:56,948][626795] Updated weights for policy 0, policy_version 221892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:37:58,721][626795] Updated weights for policy 0, policy_version 221902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:58,975][24592] Fps is (10 sec: 40143.4, 60 sec: 43690.7, 300 sec: 43931.4). Total num frames: 1817829376. Throughput: 0: 10794.1. Samples: 204431388. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:37:58,976][24592] Avg episode reward: [(0, '5.218')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:00,593][626795] Updated weights for policy 0, policy_version 221912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:02,395][626795] Updated weights for policy 0, policy_version 221922 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:03,975][24592] Fps is (10 sec: 40140.9, 60 sec: 43690.7, 300 sec: 43931.4). Total num frames: 1818050560. Throughput: 0: 10770.4. Samples: 204499524. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:03,976][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:04,113][626795] Updated weights for policy 0, policy_version 221932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:05,850][626795] Updated weights for policy 0, policy_version 221942 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:07,762][626795] Updated weights for policy 0, policy_version 221952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:08,975][24592] Fps is (10 sec: 45055.9, 60 sec: 43554.2, 300 sec: 43931.4). Total num frames: 1818279936. Throughput: 0: 11111.5. Samples: 204569232. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:08,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:09,502][626795] Updated weights for policy 0, policy_version 221962 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:11,295][626795] Updated weights for policy 0, policy_version 221972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:13,083][626795] Updated weights for policy 0, policy_version 221982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:13,975][24592] Fps is (10 sec: 46694.4, 60 sec: 43827.2, 300 sec: 43959.1). Total num frames: 1818517504. Throughput: 0: 11097.6. Samples: 204602496. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:13,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:14,792][626795] Updated weights for policy 0, policy_version 221992 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:16,674][626795] Updated weights for policy 0, policy_version 222002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:18,477][626795] Updated weights for policy 0, policy_version 222012 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:18,975][24592] Fps is (10 sec: 45874.9, 60 sec: 43827.1, 300 sec: 43931.3). Total num frames: 1818738688. Throughput: 0: 11098.6. Samples: 204671832. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:18,976][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:20,306][626795] Updated weights for policy 0, policy_version 222022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:22,011][626795] Updated weights for policy 0, policy_version 222032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:23,856][626795] Updated weights for policy 0, policy_version 222042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:23,976][24592] Fps is (10 sec: 45054.2, 60 sec: 44646.3, 300 sec: 44097.9). Total num frames: 1818968064. Throughput: 0: 11121.0. Samples: 204740280. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:23,976][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:25,797][626795] Updated weights for policy 0, policy_version 222052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:27,427][626795] Updated weights for policy 0, policy_version 222062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:29,042][24592] Fps is (10 sec: 39874.2, 60 sec: 43778.5, 300 sec: 43921.4). Total num frames: 1819140096. Throughput: 0: 11077.4. Samples: 204774120. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:29,044][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:30,649][626795] Updated weights for policy 0, policy_version 222072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:32,327][626795] Updated weights for policy 0, policy_version 222082 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:33,975][24592] Fps is (10 sec: 40142.5, 60 sec: 43690.6, 300 sec: 43906.0). Total num frames: 1819369472. Throughput: 0: 10789.5. Samples: 204828264. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:33,977][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:34,120][626795] Updated weights for policy 0, policy_version 222092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:35,983][626795] Updated weights for policy 0, policy_version 222102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:37,672][626795] Updated weights for policy 0, policy_version 222112 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:38,975][24592] Fps is (10 sec: 46184.1, 60 sec: 43827.1, 300 sec: 43903.6). Total num frames: 1819598848. Throughput: 0: 10801.7. Samples: 204897672. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:38,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:39,421][626795] Updated weights for policy 0, policy_version 222122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:41,339][626795] Updated weights for policy 0, policy_version 222132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:43,001][626795] Updated weights for policy 0, policy_version 222142 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:43,975][24592] Fps is (10 sec: 45874.8, 60 sec: 43827.1, 300 sec: 43903.6). Total num frames: 1819828224. Throughput: 0: 11113.4. Samples: 204931494. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:43,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:44,888][626795] Updated weights for policy 0, policy_version 222152 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:46,517][626795] Updated weights for policy 0, policy_version 222162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:48,359][626795] Updated weights for policy 0, policy_version 222172 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:48,975][24592] Fps is (10 sec: 45875.6, 60 sec: 43827.7, 300 sec: 43931.4). Total num frames: 1820057600. Throughput: 0: 11141.5. Samples: 205000890. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:48,976][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:50,178][626795] Updated weights for policy 0, policy_version 222182 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:52,036][626795] Updated weights for policy 0, policy_version 222192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:53,760][626795] Updated weights for policy 0, policy_version 222202 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:53,975][24592] Fps is (10 sec: 45056.4, 60 sec: 43827.2, 300 sec: 43903.6). Total num frames: 1820278784. Throughput: 0: 11092.7. Samples: 205068402. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:53,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:55,659][626795] Updated weights for policy 0, policy_version 222212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:57,410][626795] Updated weights for policy 0, policy_version 222222 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:58,976][24592] Fps is (10 sec: 45052.4, 60 sec: 44645.8, 300 sec: 44097.8). Total num frames: 1820508160. Throughput: 0: 11111.9. Samples: 205102542. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:38:58,978][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:38:59,287][626795] Updated weights for policy 0, policy_version 222232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:01,015][626795] Updated weights for policy 0, policy_version 222242 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:03,975][24592] Fps is (10 sec: 40140.7, 60 sec: 43827.2, 300 sec: 43903.6). Total num frames: 1820680192. Throughput: 0: 10741.9. Samples: 205155216. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:03,977][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000222251_1820680192.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:04,039][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000220969_1810178048.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:04,123][626795] Updated weights for policy 0, policy_version 222252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:05,919][626795] Updated weights for policy 0, policy_version 222262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:07,770][626795] Updated weights for policy 0, policy_version 222272 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:08,976][24592] Fps is (10 sec: 40143.3, 60 sec: 43827.1, 300 sec: 43903.5). Total num frames: 1820909568. Throughput: 0: 10759.0. Samples: 205224432. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:08,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:09,480][626795] Updated weights for policy 0, policy_version 222282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:11,250][626795] Updated weights for policy 0, policy_version 222292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:12,983][626795] Updated weights for policy 0, policy_version 222302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:13,975][24592] Fps is (10 sec: 45874.9, 60 sec: 43690.6, 300 sec: 43931.3). Total num frames: 1821138944. Throughput: 0: 10807.9. Samples: 205259754. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:13,976][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:14,803][626795] Updated weights for policy 0, policy_version 222312 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:16,518][626795] Updated weights for policy 0, policy_version 222322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:18,395][626795] Updated weights for policy 0, policy_version 222332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:18,975][24592] Fps is (10 sec: 45875.8, 60 sec: 43827.2, 300 sec: 43931.5). Total num frames: 1821368320. Throughput: 0: 11111.2. Samples: 205328268. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:18,977][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:20,158][626795] Updated weights for policy 0, policy_version 222342 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:21,884][626795] Updated weights for policy 0, policy_version 222352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:23,756][626795] Updated weights for policy 0, policy_version 222362 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:23,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43827.5, 300 sec: 43931.6). Total num frames: 1821597696. Throughput: 0: 11091.6. Samples: 205396794. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:23,978][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:25,578][626795] Updated weights for policy 0, policy_version 222372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:27,350][626795] Updated weights for policy 0, policy_version 222382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:28,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44832.9, 300 sec: 44173.7). Total num frames: 1821827072. Throughput: 0: 11089.6. Samples: 205430526. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:28,977][24592] Avg episode reward: [(0, '4.499')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:29,121][626795] Updated weights for policy 0, policy_version 222392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:30,993][626795] Updated weights for policy 0, policy_version 222402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:32,818][626795] Updated weights for policy 0, policy_version 222412 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:33,975][24592] Fps is (10 sec: 45056.1, 60 sec: 44646.4, 300 sec: 44125.8). Total num frames: 1822048256. Throughput: 0: 11058.8. Samples: 205498536. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:33,976][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:34,629][626795] Updated weights for policy 0, policy_version 222422 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:37,798][626795] Updated weights for policy 0, policy_version 222432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:38,975][24592] Fps is (10 sec: 39321.8, 60 sec: 43690.7, 300 sec: 43931.3). Total num frames: 1822220288. Throughput: 0: 10746.4. Samples: 205551990. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:38,976][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:39,498][626795] Updated weights for policy 0, policy_version 222442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:41,367][626795] Updated weights for policy 0, policy_version 222452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:43,249][626795] Updated weights for policy 0, policy_version 222462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:43,977][24592] Fps is (10 sec: 39313.9, 60 sec: 43552.8, 300 sec: 43903.3). Total num frames: 1822441472. Throughput: 0: 10731.4. Samples: 205585470. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:43,978][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:44,793][626795] Updated weights for policy 0, policy_version 222472 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:46,594][626795] Updated weights for policy 0, policy_version 222482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:48,498][626795] Updated weights for policy 0, policy_version 222492 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:48,975][24592] Fps is (10 sec: 45875.1, 60 sec: 43690.7, 300 sec: 43931.3). Total num frames: 1822679040. Throughput: 0: 11115.5. Samples: 205655412. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:48,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:50,227][626795] Updated weights for policy 0, policy_version 222502 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:52,021][626795] Updated weights for policy 0, policy_version 222512 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:53,697][626795] Updated weights for policy 0, policy_version 222522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:53,979][24592] Fps is (10 sec: 46687.5, 60 sec: 43824.7, 300 sec: 43930.9). Total num frames: 1822908416. Throughput: 0: 11118.9. Samples: 205724820. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:53,980][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:55,632][626795] Updated weights for policy 0, policy_version 222532 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:57,341][626795] Updated weights for policy 0, policy_version 222542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:58,975][24592] Fps is (10 sec: 45055.4, 60 sec: 43691.1, 300 sec: 43931.3). Total num frames: 1823129600. Throughput: 0: 11071.7. Samples: 205757982. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:39:58,977][24592] Avg episode reward: [(0, '4.798')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:39:59,231][626795] Updated weights for policy 0, policy_version 222552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:01,079][626795] Updated weights for policy 0, policy_version 222562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:02,755][626795] Updated weights for policy 0, policy_version 222572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:03,976][24592] Fps is (10 sec: 45068.9, 60 sec: 44645.9, 300 sec: 44125.6). Total num frames: 1823358976. Throughput: 0: 11070.3. Samples: 205826436. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:03,977][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:04,564][626795] Updated weights for policy 0, policy_version 222582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:06,417][626795] Updated weights for policy 0, policy_version 222592 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:08,208][626795] Updated weights for policy 0, policy_version 222602 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:09,694][24592] Fps is (10 sec: 39742.0, 60 sec: 43578.3, 300 sec: 43879.9). Total num frames: 1823555584. Throughput: 0: 10132.9. Samples: 205860060. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:09,696][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:11,180][626795] Updated weights for policy 0, policy_version 222612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:13,030][626795] Updated weights for policy 0, policy_version 222622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:13,975][24592] Fps is (10 sec: 40143.2, 60 sec: 43690.7, 300 sec: 43903.6). Total num frames: 1823760384. Throughput: 0: 10758.1. Samples: 205914642. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:13,976][24592] Avg episode reward: [(0, '4.921')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:14,900][626795] Updated weights for policy 0, policy_version 222632 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:16,681][626795] Updated weights for policy 0, policy_version 222642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:18,492][626795] Updated weights for policy 0, policy_version 222652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:18,975][24592] Fps is (10 sec: 45898.1, 60 sec: 43554.2, 300 sec: 43903.6). Total num frames: 1823981568. Throughput: 0: 10751.9. Samples: 205982370. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:18,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:20,261][626795] Updated weights for policy 0, policy_version 222662 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:21,977][626795] Updated weights for policy 0, policy_version 222672 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:23,846][626795] Updated weights for policy 0, policy_version 222682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:23,975][24592] Fps is (10 sec: 45055.8, 60 sec: 43554.1, 300 sec: 43875.8). Total num frames: 1824210944. Throughput: 0: 11097.7. Samples: 206051388. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:23,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:25,606][626795] Updated weights for policy 0, policy_version 222692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:27,303][626795] Updated weights for policy 0, policy_version 222702 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:27,963][626772] Signal inference workers to stop experience collection... (2800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:27,963][626772] Signal inference workers to resume experience collection... (2800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:27,978][626795] InferenceWorker_p0-w0: stopping experience collection (2800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:27,981][626795] InferenceWorker_p0-w0: resuming experience collection (2800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:28,975][24592] Fps is (10 sec: 46694.0, 60 sec: 43690.7, 300 sec: 43903.6). Total num frames: 1824448512. Throughput: 0: 11132.2. Samples: 206086398. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:28,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:29,193][626795] Updated weights for policy 0, policy_version 222712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:30,973][626795] Updated weights for policy 0, policy_version 222722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:32,792][626795] Updated weights for policy 0, policy_version 222732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:33,975][24592] Fps is (10 sec: 45875.5, 60 sec: 43690.7, 300 sec: 43903.6). Total num frames: 1824669696. Throughput: 0: 11081.7. Samples: 206154090. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:33,976][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:34,521][626795] Updated weights for policy 0, policy_version 222742 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:36,298][626795] Updated weights for policy 0, policy_version 222752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:38,018][626795] Updated weights for policy 0, policy_version 222762 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:38,976][24592] Fps is (10 sec: 45053.9, 60 sec: 44646.0, 300 sec: 44097.9). Total num frames: 1824899072. Throughput: 0: 11083.3. Samples: 206223534. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:38,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:39,909][626795] Updated weights for policy 0, policy_version 222772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:41,797][626795] Updated weights for policy 0, policy_version 222782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:43,975][24592] Fps is (10 sec: 40959.6, 60 sec: 43965.1, 300 sec: 43931.3). Total num frames: 1825079296. Throughput: 0: 11091.6. Samples: 206257104. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:43,976][24592] Avg episode reward: [(0, '4.477')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:44,609][626795] Updated weights for policy 0, policy_version 222792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:46,533][626795] Updated weights for policy 0, policy_version 222802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:48,262][626795] Updated weights for policy 0, policy_version 222812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:48,975][24592] Fps is (10 sec: 40962.0, 60 sec: 43827.2, 300 sec: 43931.3). Total num frames: 1825308672. Throughput: 0: 10802.8. Samples: 206312556. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:48,976][24592] Avg episode reward: [(0, '4.405')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:50,286][626795] Updated weights for policy 0, policy_version 222822 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:51,993][626795] Updated weights for policy 0, policy_version 222832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:53,704][626795] Updated weights for policy 0, policy_version 222842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:53,975][24592] Fps is (10 sec: 44237.4, 60 sec: 43556.7, 300 sec: 43903.6). Total num frames: 1825521664. Throughput: 0: 11746.5. Samples: 206380206. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:53,981][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:55,674][626795] Updated weights for policy 0, policy_version 222852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:57,437][626795] Updated weights for policy 0, policy_version 222862 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:58,975][24592] Fps is (10 sec: 44236.6, 60 sec: 43690.7, 300 sec: 43903.6). Total num frames: 1825751040. Throughput: 0: 11063.6. Samples: 206412504. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:40:58,976][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:40:59,174][626795] Updated weights for policy 0, policy_version 222872 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:01,002][626795] Updated weights for policy 0, policy_version 222882 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:02,728][626795] Updated weights for policy 0, policy_version 222892 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:03,975][24592] Fps is (10 sec: 45875.0, 60 sec: 43691.1, 300 sec: 43903.6). Total num frames: 1825980416. Throughput: 0: 11106.4. Samples: 206482158. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:03,977][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000222899_1825988608.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:04,112][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000221610_1815429120.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:04,666][626795] Updated weights for policy 0, policy_version 222902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:06,547][626795] Updated weights for policy 0, policy_version 222912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:08,119][626795] Updated weights for policy 0, policy_version 222922 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:08,975][24592] Fps is (10 sec: 45875.1, 60 sec: 44773.2, 300 sec: 44111.8). Total num frames: 1826209792. Throughput: 0: 11088.5. Samples: 206550372. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:08,976][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:09,945][626795] Updated weights for policy 0, policy_version 222932 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:11,823][626795] Updated weights for policy 0, policy_version 222942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:13,576][626795] Updated weights for policy 0, policy_version 222952 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:13,976][24592] Fps is (10 sec: 45873.1, 60 sec: 44646.1, 300 sec: 44097.9). Total num frames: 1826439168. Throughput: 0: 11058.2. Samples: 206584020. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:13,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:15,349][626795] Updated weights for policy 0, policy_version 222962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:18,352][626795] Updated weights for policy 0, policy_version 222972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:18,975][24592] Fps is (10 sec: 40140.7, 60 sec: 43827.1, 300 sec: 43931.3). Total num frames: 1826611200. Throughput: 0: 10787.3. Samples: 206639520. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:18,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:20,148][626795] Updated weights for policy 0, policy_version 222982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:21,996][626795] Updated weights for policy 0, policy_version 222992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:23,819][626795] Updated weights for policy 0, policy_version 223002 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:23,975][24592] Fps is (10 sec: 40142.5, 60 sec: 43827.2, 300 sec: 43903.6). Total num frames: 1826840576. Throughput: 0: 10749.3. Samples: 206707248. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:23,976][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:25,462][626795] Updated weights for policy 0, policy_version 223012 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:27,353][626795] Updated weights for policy 0, policy_version 223022 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:28,975][24592] Fps is (10 sec: 45875.7, 60 sec: 43690.7, 300 sec: 43903.6). Total num frames: 1827069952. Throughput: 0: 10775.5. Samples: 206742000. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:28,977][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:29,246][626795] Updated weights for policy 0, policy_version 223032 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:30,997][626795] Updated weights for policy 0, policy_version 223042 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:32,797][626795] Updated weights for policy 0, policy_version 223052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:33,976][24592] Fps is (10 sec: 45054.8, 60 sec: 43690.4, 300 sec: 43905.2). Total num frames: 1827291136. Throughput: 0: 11049.9. Samples: 206809806. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:33,977][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:34,657][626795] Updated weights for policy 0, policy_version 223062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:36,362][626795] Updated weights for policy 0, policy_version 223072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:38,074][626795] Updated weights for policy 0, policy_version 223082 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:38,975][24592] Fps is (10 sec: 45056.0, 60 sec: 43691.1, 300 sec: 43903.6). Total num frames: 1827520512. Throughput: 0: 11050.9. Samples: 206877498. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:38,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:40,019][626795] Updated weights for policy 0, policy_version 223092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:41,763][626795] Updated weights for policy 0, policy_version 223102 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:43,530][626795] Updated weights for policy 0, policy_version 223112 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:43,976][24592] Fps is (10 sec: 45875.4, 60 sec: 44509.7, 300 sec: 44097.9). Total num frames: 1827749888. Throughput: 0: 11119.0. Samples: 206912862. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:43,978][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:45,421][626795] Updated weights for policy 0, policy_version 223122 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:47,244][626795] Updated weights for policy 0, policy_version 223132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:49,950][24592] Fps is (10 sec: 41056.3, 60 sec: 43664.4, 300 sec: 43897.5). Total num frames: 1827971072. Throughput: 0: 10831.4. Samples: 206980122. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:49,951][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:50,148][626795] Updated weights for policy 0, policy_version 223142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:51,948][626795] Updated weights for policy 0, policy_version 223152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:53,804][626795] Updated weights for policy 0, policy_version 223162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:53,975][24592] Fps is (10 sec: 40141.9, 60 sec: 43827.2, 300 sec: 43875.8). Total num frames: 1828151296. Throughput: 0: 10769.2. Samples: 207034986. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:53,976][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:55,626][626795] Updated weights for policy 0, policy_version 223172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:57,390][626795] Updated weights for policy 0, policy_version 223182 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:58,975][24592] Fps is (10 sec: 44473.2, 60 sec: 43690.7, 300 sec: 43875.8). Total num frames: 1828372480. Throughput: 0: 10765.7. Samples: 207068472. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:41:58,977][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:41:59,274][626795] Updated weights for policy 0, policy_version 223192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:01,054][626795] Updated weights for policy 0, policy_version 223202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:02,944][626795] Updated weights for policy 0, policy_version 223212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:03,975][24592] Fps is (10 sec: 44236.8, 60 sec: 43554.1, 300 sec: 43820.3). Total num frames: 1828593664. Throughput: 0: 11024.2. Samples: 207135606. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:03,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:04,720][626795] Updated weights for policy 0, policy_version 223222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:06,343][626795] Updated weights for policy 0, policy_version 223232 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:08,226][626795] Updated weights for policy 0, policy_version 223242 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:08,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43690.7, 300 sec: 43875.8). Total num frames: 1828831232. Throughput: 0: 11075.6. Samples: 207205650. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:08,977][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:09,997][626795] Updated weights for policy 0, policy_version 223252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:11,878][626795] Updated weights for policy 0, policy_version 223262 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:13,574][626795] Updated weights for policy 0, policy_version 223272 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:13,975][24592] Fps is (10 sec: 46694.4, 60 sec: 43691.0, 300 sec: 43903.6). Total num frames: 1829060608. Throughput: 0: 11051.7. Samples: 207239328. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:13,976][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:15,350][626795] Updated weights for policy 0, policy_version 223282 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:17,160][626795] Updated weights for policy 0, policy_version 223292 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:18,975][24592] Fps is (10 sec: 45874.8, 60 sec: 44646.4, 300 sec: 44070.2). Total num frames: 1829289984. Throughput: 0: 11080.1. Samples: 207308406. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:18,977][626795] Updated weights for policy 0, policy_version 223302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:18,977][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:20,835][626795] Updated weights for policy 0, policy_version 223312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:23,628][626795] Updated weights for policy 0, policy_version 223322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:23,975][24592] Fps is (10 sec: 39321.6, 60 sec: 43554.2, 300 sec: 43875.8). Total num frames: 1829453824. Throughput: 0: 10797.9. Samples: 207363402. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:23,977][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:25,539][626795] Updated weights for policy 0, policy_version 223332 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:27,406][626795] Updated weights for policy 0, policy_version 223342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:28,975][24592] Fps is (10 sec: 39321.8, 60 sec: 43554.1, 300 sec: 43848.0). Total num frames: 1829683200. Throughput: 0: 10758.9. Samples: 207397008. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:28,976][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:29,257][626795] Updated weights for policy 0, policy_version 223352 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:31,065][626795] Updated weights for policy 0, policy_version 223362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:32,683][626795] Updated weights for policy 0, policy_version 223372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:33,975][24592] Fps is (10 sec: 46694.4, 60 sec: 43827.4, 300 sec: 43903.6). Total num frames: 1829920768. Throughput: 0: 11022.6. Samples: 207465402. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:33,976][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:34,457][626795] Updated weights for policy 0, policy_version 223382 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:36,256][626795] Updated weights for policy 0, policy_version 223392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:37,813][626795] Updated weights for policy 0, policy_version 223402 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:38,975][24592] Fps is (10 sec: 48332.9, 60 sec: 44100.2, 300 sec: 43959.1). Total num frames: 1830166528. Throughput: 0: 11180.0. Samples: 207538086. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:38,978][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:39,517][626795] Updated weights for policy 0, policy_version 223412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:41,362][626795] Updated weights for policy 0, policy_version 223422 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:43,108][626795] Updated weights for policy 0, policy_version 223432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:43,975][24592] Fps is (10 sec: 46693.9, 60 sec: 43963.9, 300 sec: 43931.4). Total num frames: 1830387712. Throughput: 0: 11202.1. Samples: 207572568. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:43,977][24592] Avg episode reward: [(0, '4.316')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:44,893][626795] Updated weights for policy 0, policy_version 223442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:46,764][626795] Updated weights for policy 0, policy_version 223452 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:48,613][626795] Updated weights for policy 0, policy_version 223462 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:48,976][24592] Fps is (10 sec: 45054.5, 60 sec: 44827.8, 300 sec: 43959.1). Total num frames: 1830617088. Throughput: 0: 11208.7. Samples: 207640002. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:48,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:50,408][626795] Updated weights for policy 0, policy_version 223472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:52,259][626795] Updated weights for policy 0, policy_version 223482 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:53,964][626795] Updated weights for policy 0, policy_version 223492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:53,976][24592] Fps is (10 sec: 45875.0, 60 sec: 44919.4, 300 sec: 44125.7). Total num frames: 1830846464. Throughput: 0: 11165.4. Samples: 207708096. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:53,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:57,332][626795] Updated weights for policy 0, policy_version 223502 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:58,975][24592] Fps is (10 sec: 38503.4, 60 sec: 43827.2, 300 sec: 43903.6). Total num frames: 1831002112. Throughput: 0: 10927.3. Samples: 207731058. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:42:58,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:42:59,234][626795] Updated weights for policy 0, policy_version 223512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:00,945][626795] Updated weights for policy 0, policy_version 223522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:02,792][626795] Updated weights for policy 0, policy_version 223532 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:03,975][24592] Fps is (10 sec: 37683.7, 60 sec: 43827.2, 300 sec: 43875.8). Total num frames: 1831223296. Throughput: 0: 10759.1. Samples: 207792564. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:03,976][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000223538_1831223296.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:04,045][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000222251_1820680192.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:04,687][626795] Updated weights for policy 0, policy_version 223542 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:06,558][626795] Updated weights for policy 0, policy_version 223552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:08,323][626795] Updated weights for policy 0, policy_version 223562 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:08,976][24592] Fps is (10 sec: 44236.0, 60 sec: 43553.9, 300 sec: 43820.2). Total num frames: 1831444480. Throughput: 0: 11020.7. Samples: 207859338. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:08,977][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:10,086][626795] Updated weights for policy 0, policy_version 223572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:11,957][626795] Updated weights for policy 0, policy_version 223582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:13,603][626795] Updated weights for policy 0, policy_version 223592 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:13,975][24592] Fps is (10 sec: 45055.6, 60 sec: 43554.1, 300 sec: 43848.0). Total num frames: 1831673856. Throughput: 0: 11039.7. Samples: 207893796. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:13,976][24592] Avg episode reward: [(0, '4.404')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:15,463][626795] Updated weights for policy 0, policy_version 223602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:17,281][626795] Updated weights for policy 0, policy_version 223612 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:18,972][626795] Updated weights for policy 0, policy_version 223622 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:18,976][24592] Fps is (10 sec: 46694.8, 60 sec: 43690.6, 300 sec: 43875.8). Total num frames: 1831911424. Throughput: 0: 11043.6. Samples: 207962364. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:18,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:20,782][626795] Updated weights for policy 0, policy_version 223632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:22,473][626795] Updated weights for policy 0, policy_version 223642 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:23,975][24592] Fps is (10 sec: 45875.6, 60 sec: 44646.4, 300 sec: 44052.4). Total num frames: 1832132608. Throughput: 0: 10964.7. Samples: 208031496. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:23,977][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:24,408][626795] Updated weights for policy 0, policy_version 223652 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:26,198][626795] Updated weights for policy 0, policy_version 223662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:28,000][626795] Updated weights for policy 0, policy_version 223672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:28,976][24592] Fps is (10 sec: 45056.1, 60 sec: 44646.3, 300 sec: 44042.4). Total num frames: 1832361984. Throughput: 0: 10960.3. Samples: 208065780. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:28,977][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:31,026][626795] Updated weights for policy 0, policy_version 223682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:32,866][626795] Updated weights for policy 0, policy_version 223692 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:33,976][24592] Fps is (10 sec: 39319.7, 60 sec: 43417.2, 300 sec: 43820.2). Total num frames: 1832525824. Throughput: 0: 10658.6. Samples: 208119642. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:33,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:34,755][626795] Updated weights for policy 0, policy_version 223702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:36,502][626795] Updated weights for policy 0, policy_version 223712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:38,312][626795] Updated weights for policy 0, policy_version 223722 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:38,975][24592] Fps is (10 sec: 39321.9, 60 sec: 43144.5, 300 sec: 43820.3). Total num frames: 1832755200. Throughput: 0: 10638.4. Samples: 208186824. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:38,977][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:40,056][626795] Updated weights for policy 0, policy_version 223732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:41,846][626795] Updated weights for policy 0, policy_version 223742 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:43,669][626795] Updated weights for policy 0, policy_version 223752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:43,975][24592] Fps is (10 sec: 45877.5, 60 sec: 43281.1, 300 sec: 43820.3). Total num frames: 1832984576. Throughput: 0: 10900.9. Samples: 208221600. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:43,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:45,395][626795] Updated weights for policy 0, policy_version 223762 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:47,170][626795] Updated weights for policy 0, policy_version 223772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:48,976][24592] Fps is (10 sec: 45874.8, 60 sec: 43281.2, 300 sec: 43848.0). Total num frames: 1833213952. Throughput: 0: 11072.2. Samples: 208290816. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:48,978][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:49,054][626795] Updated weights for policy 0, policy_version 223782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:50,759][626795] Updated weights for policy 0, policy_version 223792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:52,573][626795] Updated weights for policy 0, policy_version 223802 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:53,976][24592] Fps is (10 sec: 45874.5, 60 sec: 43281.1, 300 sec: 43848.1). Total num frames: 1833443328. Throughput: 0: 11102.6. Samples: 208358952. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:53,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:54,382][626795] Updated weights for policy 0, policy_version 223812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:56,224][626795] Updated weights for policy 0, policy_version 223822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:57,959][626795] Updated weights for policy 0, policy_version 223832 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:58,975][24592] Fps is (10 sec: 45875.9, 60 sec: 44509.9, 300 sec: 44042.4). Total num frames: 1833672704. Throughput: 0: 11101.9. Samples: 208393380. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:43:58,977][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:43:59,859][626795] Updated weights for policy 0, policy_version 223842 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:01,701][626795] Updated weights for policy 0, policy_version 223852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:04,132][24592] Fps is (10 sec: 39521.3, 60 sec: 43576.7, 300 sec: 43824.7). Total num frames: 1833844736. Throughput: 0: 11050.2. Samples: 208461354. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:04,134][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:04,647][626795] Updated weights for policy 0, policy_version 223862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:06,470][626795] Updated weights for policy 0, policy_version 223872 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:08,321][626795] Updated weights for policy 0, policy_version 223882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:08,982][24592] Fps is (10 sec: 39296.1, 60 sec: 43686.1, 300 sec: 43819.3). Total num frames: 1834065920. Throughput: 0: 10738.3. Samples: 208514790. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:08,983][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:10,020][626795] Updated weights for policy 0, policy_version 223892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:11,693][626795] Updated weights for policy 0, policy_version 223902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:13,527][626795] Updated weights for policy 0, policy_version 223912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:13,975][24592] Fps is (10 sec: 46606.6, 60 sec: 43827.3, 300 sec: 43848.0). Total num frames: 1834303488. Throughput: 0: 10790.6. Samples: 208551354. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:13,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:15,474][626795] Updated weights for policy 0, policy_version 223922 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:17,124][626795] Updated weights for policy 0, policy_version 223932 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:18,975][24592] Fps is (10 sec: 45905.1, 60 sec: 43554.3, 300 sec: 43820.3). Total num frames: 1834524672. Throughput: 0: 11083.6. Samples: 208618398. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:18,976][24592] Avg episode reward: [(0, '4.363')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:18,989][626795] Updated weights for policy 0, policy_version 223942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:20,684][626795] Updated weights for policy 0, policy_version 223952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:22,515][626795] Updated weights for policy 0, policy_version 223962 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:23,975][24592] Fps is (10 sec: 45055.2, 60 sec: 43690.6, 300 sec: 43820.2). Total num frames: 1834754048. Throughput: 0: 11127.2. Samples: 208687548. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:23,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:24,321][626795] Updated weights for policy 0, policy_version 223972 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:26,280][626795] Updated weights for policy 0, policy_version 223982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:27,779][626795] Updated weights for policy 0, policy_version 223992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:28,975][24592] Fps is (10 sec: 46694.2, 60 sec: 43827.3, 300 sec: 43875.8). Total num frames: 1834991616. Throughput: 0: 11100.8. Samples: 208721136. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:28,978][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:29,717][626795] Updated weights for policy 0, policy_version 224002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:31,528][626795] Updated weights for policy 0, policy_version 224012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:33,319][626795] Updated weights for policy 0, policy_version 224022 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:33,976][24592] Fps is (10 sec: 45874.9, 60 sec: 44783.2, 300 sec: 44042.4). Total num frames: 1835212800. Throughput: 0: 11097.5. Samples: 208790202. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:33,978][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:35,168][626795] Updated weights for policy 0, policy_version 224032 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:38,121][626795] Updated weights for policy 0, policy_version 224042 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:38,975][24592] Fps is (10 sec: 39321.6, 60 sec: 43827.2, 300 sec: 43876.1). Total num frames: 1835384832. Throughput: 0: 10771.8. Samples: 208843680. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:38,977][24592] Avg episode reward: [(0, '4.274')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:40,043][626795] Updated weights for policy 0, policy_version 224052 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:41,826][626795] Updated weights for policy 0, policy_version 224062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:43,535][626795] Updated weights for policy 0, policy_version 224072 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:43,975][24592] Fps is (10 sec: 40960.9, 60 sec: 43963.8, 300 sec: 43875.8). Total num frames: 1835622400. Throughput: 0: 10752.0. Samples: 208877220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:43,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:45,252][626795] Updated weights for policy 0, policy_version 224082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:47,034][626795] Updated weights for policy 0, policy_version 224092 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:48,933][626795] Updated weights for policy 0, policy_version 224102 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:48,976][24592] Fps is (10 sec: 45872.7, 60 sec: 43826.9, 300 sec: 43848.5). Total num frames: 1835843584. Throughput: 0: 10833.1. Samples: 208947150. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:48,978][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:50,788][626795] Updated weights for policy 0, policy_version 224112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:52,474][626795] Updated weights for policy 0, policy_version 224122 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:53,976][24592] Fps is (10 sec: 45053.0, 60 sec: 43826.8, 300 sec: 43875.7). Total num frames: 1836072960. Throughput: 0: 11146.5. Samples: 209016318. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:53,978][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:54,274][626795] Updated weights for policy 0, policy_version 224132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:56,075][626795] Updated weights for policy 0, policy_version 224142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:57,935][626795] Updated weights for policy 0, policy_version 224152 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:58,975][24592] Fps is (10 sec: 45058.4, 60 sec: 43690.6, 300 sec: 43848.1). Total num frames: 1836294144. Throughput: 0: 11068.9. Samples: 209049456. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:44:58,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:44:59,827][626795] Updated weights for policy 0, policy_version 224162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:01,423][626795] Updated weights for policy 0, policy_version 224172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:03,228][626795] Updated weights for policy 0, policy_version 224182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:03,976][24592] Fps is (10 sec: 45876.3, 60 sec: 44900.0, 300 sec: 44094.3). Total num frames: 1836531712. Throughput: 0: 11116.8. Samples: 209118660. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:03,980][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:03,986][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000224186_1836531712.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:04,057][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000222899_1825988608.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:05,124][626795] Updated weights for policy 0, policy_version 224192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:06,976][626795] Updated weights for policy 0, policy_version 224202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:08,786][626795] Updated weights for policy 0, policy_version 224212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:08,975][24592] Fps is (10 sec: 45875.0, 60 sec: 44787.7, 300 sec: 44042.4). Total num frames: 1836752896. Throughput: 0: 11068.9. Samples: 209185650. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:08,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:11,808][626795] Updated weights for policy 0, policy_version 224222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:13,737][626795] Updated weights for policy 0, policy_version 224232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:13,976][24592] Fps is (10 sec: 38503.1, 60 sec: 43553.9, 300 sec: 43848.0). Total num frames: 1836916736. Throughput: 0: 10757.3. Samples: 209205216. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:13,978][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:15,606][626795] Updated weights for policy 0, policy_version 224242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:17,147][626795] Updated weights for policy 0, policy_version 224252 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:18,902][626795] Updated weights for policy 0, policy_version 224262 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:18,975][24592] Fps is (10 sec: 40141.0, 60 sec: 43827.2, 300 sec: 43875.8). Total num frames: 1837154304. Throughput: 0: 10755.1. Samples: 209274180. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:18,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:20,728][626795] Updated weights for policy 0, policy_version 224272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:22,512][626795] Updated weights for policy 0, policy_version 224282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:23,975][24592] Fps is (10 sec: 45876.2, 60 sec: 43690.8, 300 sec: 43820.3). Total num frames: 1837375488. Throughput: 0: 11101.2. Samples: 209343234. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:23,976][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:24,335][626795] Updated weights for policy 0, policy_version 224292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:26,139][626795] Updated weights for policy 0, policy_version 224302 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:27,794][626795] Updated weights for policy 0, policy_version 224312 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:28,976][24592] Fps is (10 sec: 45055.2, 60 sec: 43554.0, 300 sec: 43848.0). Total num frames: 1837604864. Throughput: 0: 11114.1. Samples: 209377356. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:28,977][24592] Avg episode reward: [(0, '4.837')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:29,684][626795] Updated weights for policy 0, policy_version 224322 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:31,610][626795] Updated weights for policy 0, policy_version 224332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:33,349][626795] Updated weights for policy 0, policy_version 224342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:33,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43690.8, 300 sec: 43848.1). Total num frames: 1837834240. Throughput: 0: 11068.3. Samples: 209445216. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:33,977][24592] Avg episode reward: [(0, '5.094')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:35,046][626795] Updated weights for policy 0, policy_version 224352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:36,908][626795] Updated weights for policy 0, policy_version 224362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:38,686][626795] Updated weights for policy 0, policy_version 224372 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:38,975][24592] Fps is (10 sec: 45875.8, 60 sec: 44646.4, 300 sec: 44014.7). Total num frames: 1838063616. Throughput: 0: 11060.5. Samples: 209514036. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:38,976][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:40,549][626795] Updated weights for policy 0, policy_version 224382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:42,287][626795] Updated weights for policy 0, policy_version 224392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:44,835][24592] Fps is (10 sec: 40735.8, 60 sec: 43612.1, 300 sec: 43831.4). Total num frames: 1838276608. Throughput: 0: 10859.2. Samples: 209547450. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:44,836][24592] Avg episode reward: [(0, '4.995')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:45,401][626795] Updated weights for policy 0, policy_version 224402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:47,234][626795] Updated weights for policy 0, policy_version 224412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:48,975][24592] Fps is (10 sec: 39321.7, 60 sec: 43554.5, 300 sec: 43848.0). Total num frames: 1838456832. Throughput: 0: 10722.0. Samples: 209601144. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:48,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:49,130][626795] Updated weights for policy 0, policy_version 224422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:50,810][626795] Updated weights for policy 0, policy_version 224432 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:52,568][626795] Updated weights for policy 0, policy_version 224442 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:53,975][24592] Fps is (10 sec: 44810.9, 60 sec: 43554.6, 300 sec: 43848.0). Total num frames: 1838686208. Throughput: 0: 10766.1. Samples: 209670126. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:53,976][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:54,357][626795] Updated weights for policy 0, policy_version 224452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:56,197][626795] Updated weights for policy 0, policy_version 224462 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:58,070][626795] Updated weights for policy 0, policy_version 224472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:58,976][24592] Fps is (10 sec: 45874.6, 60 sec: 43690.6, 300 sec: 43848.0). Total num frames: 1838915584. Throughput: 0: 11075.2. Samples: 209703600. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:45:58,978][24592] Avg episode reward: [(0, '5.059')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:45:59,814][626795] Updated weights for policy 0, policy_version 224482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:01,593][626795] Updated weights for policy 0, policy_version 224492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:03,416][626795] Updated weights for policy 0, policy_version 224502 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:03,976][24592] Fps is (10 sec: 45872.8, 60 sec: 43554.0, 300 sec: 43847.9). Total num frames: 1839144960. Throughput: 0: 11065.9. Samples: 209772150. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:03,978][24592] Avg episode reward: [(0, '4.930')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:05,301][626795] Updated weights for policy 0, policy_version 224512 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:07,101][626795] Updated weights for policy 0, policy_version 224522 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:08,635][626795] Updated weights for policy 0, policy_version 224532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:08,975][24592] Fps is (10 sec: 45875.6, 60 sec: 43690.7, 300 sec: 43848.1). Total num frames: 1839374336. Throughput: 0: 11071.6. Samples: 209841456. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:08,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:10,570][626795] Updated weights for policy 0, policy_version 224542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:12,423][626795] Updated weights for policy 0, policy_version 224552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:13,975][24592] Fps is (10 sec: 45058.6, 60 sec: 44646.6, 300 sec: 44014.7). Total num frames: 1839595520. Throughput: 0: 11042.0. Samples: 209874246. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:13,977][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:14,280][626795] Updated weights for policy 0, policy_version 224562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:16,001][626795] Updated weights for policy 0, policy_version 224572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:18,975][24592] Fps is (10 sec: 39321.7, 60 sec: 43554.1, 300 sec: 43820.3). Total num frames: 1839767552. Throughput: 0: 10934.4. Samples: 209937264. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:18,978][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:19,163][626795] Updated weights for policy 0, policy_version 224582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:20,998][626795] Updated weights for policy 0, policy_version 224592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:22,773][626795] Updated weights for policy 0, policy_version 224602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:23,976][24592] Fps is (10 sec: 40137.3, 60 sec: 43690.0, 300 sec: 43820.1). Total num frames: 1839996928. Throughput: 0: 10723.8. Samples: 209996616. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:23,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:24,520][626795] Updated weights for policy 0, policy_version 224612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:26,310][626795] Updated weights for policy 0, policy_version 224622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:28,122][626795] Updated weights for policy 0, policy_version 224632 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:28,975][24592] Fps is (10 sec: 45056.1, 60 sec: 43554.3, 300 sec: 43820.3). Total num frames: 1840218112. Throughput: 0: 10947.5. Samples: 210030678. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:28,976][24592] Avg episode reward: [(0, '4.819')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:29,960][626795] Updated weights for policy 0, policy_version 224642 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:31,776][626795] Updated weights for policy 0, policy_version 224652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:33,537][626795] Updated weights for policy 0, policy_version 224662 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:33,975][24592] Fps is (10 sec: 45060.1, 60 sec: 43554.1, 300 sec: 43820.3). Total num frames: 1840447488. Throughput: 0: 11042.5. Samples: 210098058. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:33,976][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:35,400][626795] Updated weights for policy 0, policy_version 224672 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:37,249][626795] Updated weights for policy 0, policy_version 224682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:38,975][24592] Fps is (10 sec: 45056.0, 60 sec: 43417.6, 300 sec: 43792.5). Total num frames: 1840668672. Throughput: 0: 11014.4. Samples: 210165774. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:38,978][24592] Avg episode reward: [(0, '4.508')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:39,000][626795] Updated weights for policy 0, policy_version 224692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:40,732][626795] Updated weights for policy 0, policy_version 224702 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:42,590][626795] Updated weights for policy 0, policy_version 224712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:43,975][24592] Fps is (10 sec: 45056.0, 60 sec: 44325.6, 300 sec: 43965.4). Total num frames: 1840898048. Throughput: 0: 11042.7. Samples: 210200520. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:43,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:44,437][626795] Updated weights for policy 0, policy_version 224722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:46,120][626795] Updated weights for policy 0, policy_version 224732 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:48,052][626795] Updated weights for policy 0, policy_version 224742 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:48,975][24592] Fps is (10 sec: 46694.4, 60 sec: 44646.4, 300 sec: 44014.6). Total num frames: 1841135616. Throughput: 0: 11018.7. Samples: 210267984. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:48,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:49,758][626795] Updated weights for policy 0, policy_version 224752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:52,988][626795] Updated weights for policy 0, policy_version 224762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:53,975][24592] Fps is (10 sec: 40140.8, 60 sec: 43554.2, 300 sec: 43820.3). Total num frames: 1841299456. Throughput: 0: 10664.3. Samples: 210321348. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:53,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:54,699][626795] Updated weights for policy 0, policy_version 224772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:56,619][626795] Updated weights for policy 0, policy_version 224782 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:46:58,279][626795] Updated weights for policy 0, policy_version 224792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:58,976][24592] Fps is (10 sec: 39320.5, 60 sec: 43554.0, 300 sec: 43848.0). Total num frames: 1841528832. Throughput: 0: 10698.5. Samples: 210355680. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:46:58,977][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:00,039][626795] Updated weights for policy 0, policy_version 224802 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:01,805][626795] Updated weights for policy 0, policy_version 224812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:03,585][626795] Updated weights for policy 0, policy_version 224822 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:03,977][24592] Fps is (10 sec: 45050.3, 60 sec: 43417.1, 300 sec: 43792.3). Total num frames: 1841750016. Throughput: 0: 10828.6. Samples: 210424566. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:03,979][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000224823_1841750016.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:04,042][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000223538_1831223296.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:05,550][626795] Updated weights for policy 0, policy_version 224832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:07,348][626795] Updated weights for policy 0, policy_version 224842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:08,975][24592] Fps is (10 sec: 45057.2, 60 sec: 43417.6, 300 sec: 43792.5). Total num frames: 1841979392. Throughput: 0: 11010.9. Samples: 210492096. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:08,976][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:09,125][626795] Updated weights for policy 0, policy_version 224852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:11,119][626795] Updated weights for policy 0, policy_version 224862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:12,743][626795] Updated weights for policy 0, policy_version 224872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:13,975][24592] Fps is (10 sec: 45881.1, 60 sec: 43554.2, 300 sec: 43792.5). Total num frames: 1842208768. Throughput: 0: 10988.5. Samples: 210525162. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:13,976][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:14,522][626795] Updated weights for policy 0, policy_version 224882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:16,321][626795] Updated weights for policy 0, policy_version 224892 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:18,297][626795] Updated weights for policy 0, policy_version 224902 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:18,312][626772] Signal inference workers to stop experience collection... (2850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:18,312][626772] Signal inference workers to resume experience collection... (2850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:18,322][626795] InferenceWorker_p0-w0: stopping experience collection (2850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:18,330][626795] InferenceWorker_p0-w0: resuming experience collection (2850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:18,975][24592] Fps is (10 sec: 45056.0, 60 sec: 44373.3, 300 sec: 43986.9). Total num frames: 1842429952. Throughput: 0: 11032.7. Samples: 210594528. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:18,977][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:19,878][626795] Updated weights for policy 0, policy_version 224912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:21,778][626795] Updated weights for policy 0, policy_version 224922 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:23,511][626795] Updated weights for policy 0, policy_version 224932 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:23,975][24592] Fps is (10 sec: 44236.6, 60 sec: 44237.5, 300 sec: 43959.1). Total num frames: 1842651136. Throughput: 0: 11038.8. Samples: 210662520. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:23,977][24592] Avg episode reward: [(0, '4.432')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:26,747][626795] Updated weights for policy 0, policy_version 224942 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:28,501][626795] Updated weights for policy 0, policy_version 224952 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:28,975][24592] Fps is (10 sec: 39321.7, 60 sec: 43417.6, 300 sec: 43736.9). Total num frames: 1842823168. Throughput: 0: 10678.4. Samples: 210681048. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:28,977][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:30,352][626795] Updated weights for policy 0, policy_version 224962 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:32,020][626795] Updated weights for policy 0, policy_version 224972 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:33,867][626795] Updated weights for policy 0, policy_version 224982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:33,975][24592] Fps is (10 sec: 40140.8, 60 sec: 43417.6, 300 sec: 43681.4). Total num frames: 1843052544. Throughput: 0: 10719.1. Samples: 210750342. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:33,977][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:35,657][626795] Updated weights for policy 0, policy_version 224992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:37,491][626795] Updated weights for policy 0, policy_version 225002 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:38,975][24592] Fps is (10 sec: 45875.3, 60 sec: 43554.2, 300 sec: 43709.2). Total num frames: 1843281920. Throughput: 0: 11036.3. Samples: 210817980. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:38,976][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:39,288][626795] Updated weights for policy 0, policy_version 225012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:41,137][626795] Updated weights for policy 0, policy_version 225022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:42,956][626795] Updated weights for policy 0, policy_version 225032 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:43,975][24592] Fps is (10 sec: 45875.2, 60 sec: 43554.1, 300 sec: 43709.2). Total num frames: 1843511296. Throughput: 0: 11024.7. Samples: 210851790. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:43,976][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:44,723][626795] Updated weights for policy 0, policy_version 225042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:46,532][626795] Updated weights for policy 0, policy_version 225052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:48,255][626795] Updated weights for policy 0, policy_version 225062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:48,975][24592] Fps is (10 sec: 45055.7, 60 sec: 43281.1, 300 sec: 43681.4). Total num frames: 1843732480. Throughput: 0: 11012.3. Samples: 210920106. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:48,978][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:50,127][626795] Updated weights for policy 0, policy_version 225072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:51,921][626795] Updated weights for policy 0, policy_version 225082 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:53,725][626795] Updated weights for policy 0, policy_version 225092 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:53,976][24592] Fps is (10 sec: 44235.0, 60 sec: 44236.5, 300 sec: 43903.5). Total num frames: 1843953664. Throughput: 0: 11018.6. Samples: 210987936. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:53,977][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:55,532][626795] Updated weights for policy 0, policy_version 225102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:47:57,265][626795] Updated weights for policy 0, policy_version 225112 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:59,122][24592] Fps is (10 sec: 40368.4, 60 sec: 43448.2, 300 sec: 43770.7). Total num frames: 1844142080. Throughput: 0: 11016.1. Samples: 211022502. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:47:59,124][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:00,365][626795] Updated weights for policy 0, policy_version 225122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:02,261][626795] Updated weights for policy 0, policy_version 225132 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:03,916][626795] Updated weights for policy 0, policy_version 225142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:03,976][24592] Fps is (10 sec: 40958.2, 60 sec: 43554.5, 300 sec: 43792.4). Total num frames: 1844363264. Throughput: 0: 10683.3. Samples: 211075284. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:03,978][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:05,665][626795] Updated weights for policy 0, policy_version 225152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:07,437][626795] Updated weights for policy 0, policy_version 225162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:08,975][24592] Fps is (10 sec: 45726.3, 60 sec: 43554.1, 300 sec: 43792.5). Total num frames: 1844592640. Throughput: 0: 10746.4. Samples: 211146108. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:08,976][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:09,331][626795] Updated weights for policy 0, policy_version 225172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:11,085][626795] Updated weights for policy 0, policy_version 225182 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:12,940][626795] Updated weights for policy 0, policy_version 225192 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:13,976][24592] Fps is (10 sec: 45877.7, 60 sec: 43553.9, 300 sec: 43764.7). Total num frames: 1844822016. Throughput: 0: 11062.2. Samples: 211178850. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:13,977][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:14,688][626795] Updated weights for policy 0, policy_version 225202 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:16,569][626795] Updated weights for policy 0, policy_version 225212 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:18,360][626795] Updated weights for policy 0, policy_version 225222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:18,976][24592] Fps is (10 sec: 45053.6, 60 sec: 43553.8, 300 sec: 43764.6). Total num frames: 1845043200. Throughput: 0: 11054.0. Samples: 211247778. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:18,978][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:20,037][626795] Updated weights for policy 0, policy_version 225232 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:21,883][626795] Updated weights for policy 0, policy_version 225242 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:23,529][626795] Updated weights for policy 0, policy_version 225252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:23,976][24592] Fps is (10 sec: 45054.8, 60 sec: 43690.3, 300 sec: 43764.7). Total num frames: 1845272576. Throughput: 0: 11098.1. Samples: 211317402. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:23,978][24592] Avg episode reward: [(0, '4.932')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:25,455][626795] Updated weights for policy 0, policy_version 225262 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:27,293][626795] Updated weights for policy 0, policy_version 225272 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:28,975][24592] Fps is (10 sec: 45877.8, 60 sec: 44646.4, 300 sec: 43987.0). Total num frames: 1845501952. Throughput: 0: 11084.3. Samples: 211350582. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:28,976][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:29,013][626795] Updated weights for policy 0, policy_version 225282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:30,784][626795] Updated weights for policy 0, policy_version 225292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:33,957][626795] Updated weights for policy 0, policy_version 225302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:33,975][24592] Fps is (10 sec: 40142.9, 60 sec: 43690.6, 300 sec: 43792.5). Total num frames: 1845673984. Throughput: 0: 10790.9. Samples: 211405698. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:33,976][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:35,796][626795] Updated weights for policy 0, policy_version 225312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:37,336][626795] Updated weights for policy 0, policy_version 225322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:38,975][24592] Fps is (10 sec: 40960.0, 60 sec: 43827.2, 300 sec: 43820.3). Total num frames: 1845911552. Throughput: 0: 10823.2. Samples: 211474974. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:38,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:39,107][626795] Updated weights for policy 0, policy_version 225332 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:41,033][626795] Updated weights for policy 0, policy_version 225342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:42,831][626795] Updated weights for policy 0, policy_version 225352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:43,975][24592] Fps is (10 sec: 45875.4, 60 sec: 43690.7, 300 sec: 43792.5). Total num frames: 1846132736. Throughput: 0: 10841.3. Samples: 211508772. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:43,977][24592] Avg episode reward: [(0, '5.030')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:44,608][626795] Updated weights for policy 0, policy_version 225362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:46,397][626795] Updated weights for policy 0, policy_version 225372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:48,220][626795] Updated weights for policy 0, policy_version 225382 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:48,975][24592] Fps is (10 sec: 45055.8, 60 sec: 43827.2, 300 sec: 43792.5). Total num frames: 1846362112. Throughput: 0: 11141.4. Samples: 211576638. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:48,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:50,069][626795] Updated weights for policy 0, policy_version 225392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:51,871][626795] Updated weights for policy 0, policy_version 225402 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:53,572][626795] Updated weights for policy 0, policy_version 225412 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:53,975][24592] Fps is (10 sec: 45056.0, 60 sec: 43827.5, 300 sec: 43764.7). Total num frames: 1846583296. Throughput: 0: 11081.1. Samples: 211644756. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:53,976][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:55,359][626795] Updated weights for policy 0, policy_version 225422 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:57,233][626795] Updated weights for policy 0, policy_version 225432 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:58,975][24592] Fps is (10 sec: 45055.7, 60 sec: 44618.8, 300 sec: 43982.5). Total num frames: 1846812672. Throughput: 0: 11114.2. Samples: 211678986. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:48:58,978][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:48:58,988][626795] Updated weights for policy 0, policy_version 225442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:00,859][626795] Updated weights for policy 0, policy_version 225452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:02,624][626795] Updated weights for policy 0, policy_version 225462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:03,975][24592] Fps is (10 sec: 45056.0, 60 sec: 44510.5, 300 sec: 43960.1). Total num frames: 1847033856. Throughput: 0: 11106.4. Samples: 211747560. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:03,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:04,000][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000225469_1847042048.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:04,064][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000224186_1836531712.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:04,455][626795] Updated weights for policy 0, policy_version 225472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:07,616][626795] Updated weights for policy 0, policy_version 225482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:08,975][24592] Fps is (10 sec: 39321.8, 60 sec: 43554.1, 300 sec: 43736.9). Total num frames: 1847205888. Throughput: 0: 10718.8. Samples: 211799742. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:08,977][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:09,498][626795] Updated weights for policy 0, policy_version 225492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:11,214][626795] Updated weights for policy 0, policy_version 225502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:12,801][626795] Updated weights for policy 0, policy_version 225512 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:13,975][24592] Fps is (10 sec: 40959.9, 60 sec: 43690.8, 300 sec: 43792.5). Total num frames: 1847443456. Throughput: 0: 10758.4. Samples: 211834710. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:13,977][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:14,694][626795] Updated weights for policy 0, policy_version 225522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:16,538][626795] Updated weights for policy 0, policy_version 225532 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:18,293][626795] Updated weights for policy 0, policy_version 225542 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:18,975][24592] Fps is (10 sec: 45874.8, 60 sec: 43691.0, 300 sec: 43764.7). Total num frames: 1847664640. Throughput: 0: 11050.9. Samples: 211902990. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:18,976][24592] Avg episode reward: [(0, '5.004')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:20,085][626795] Updated weights for policy 0, policy_version 225552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:21,905][626795] Updated weights for policy 0, policy_version 225562 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:23,814][626795] Updated weights for policy 0, policy_version 225572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:23,975][24592] Fps is (10 sec: 45055.4, 60 sec: 43691.0, 300 sec: 43736.9). Total num frames: 1847894016. Throughput: 0: 11025.8. Samples: 211971138. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:23,978][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:25,573][626795] Updated weights for policy 0, policy_version 225582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:27,281][626795] Updated weights for policy 0, policy_version 225592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:28,975][24592] Fps is (10 sec: 45875.7, 60 sec: 43690.6, 300 sec: 43764.7). Total num frames: 1848123392. Throughput: 0: 11037.6. Samples: 212005464. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:28,977][24592] Avg episode reward: [(0, '4.885')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:29,075][626795] Updated weights for policy 0, policy_version 225602 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:30,911][626795] Updated weights for policy 0, policy_version 225612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:32,685][626795] Updated weights for policy 0, policy_version 225622 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:33,975][24592] Fps is (10 sec: 45056.7, 60 sec: 44509.9, 300 sec: 43931.3). Total num frames: 1848344576. Throughput: 0: 11040.0. Samples: 212073438. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:33,976][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:34,601][626795] Updated weights for policy 0, policy_version 225632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:36,253][626795] Updated weights for policy 0, policy_version 225642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:38,112][626795] Updated weights for policy 0, policy_version 225652 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:39,797][24592] Fps is (10 sec: 40120.7, 60 sec: 43504.4, 300 sec: 43726.2). Total num frames: 1848557568. Throughput: 0: 10105.6. Samples: 212107812. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:39,798][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:41,268][626795] Updated weights for policy 0, policy_version 225662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:43,017][626795] Updated weights for policy 0, policy_version 225672 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:43,975][24592] Fps is (10 sec: 40140.7, 60 sec: 43554.1, 300 sec: 43737.0). Total num frames: 1848745984. Throughput: 0: 10721.0. Samples: 212161428. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:43,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:44,767][626795] Updated weights for policy 0, policy_version 225682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:46,446][626795] Updated weights for policy 0, policy_version 225692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:48,295][626795] Updated weights for policy 0, policy_version 225702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:48,975][24592] Fps is (10 sec: 45519.8, 60 sec: 43554.1, 300 sec: 43737.0). Total num frames: 1848975360. Throughput: 0: 10744.7. Samples: 212231070. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:48,976][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:50,150][626795] Updated weights for policy 0, policy_version 225712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:51,841][626795] Updated weights for policy 0, policy_version 225722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:53,649][626795] Updated weights for policy 0, policy_version 225732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:53,976][24592] Fps is (10 sec: 45873.7, 60 sec: 43690.4, 300 sec: 43764.7). Total num frames: 1849204736. Throughput: 0: 11113.4. Samples: 212299848. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:53,980][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:55,625][626795] Updated weights for policy 0, policy_version 225742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:57,363][626795] Updated weights for policy 0, policy_version 225752 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:58,975][24592] Fps is (10 sec: 45055.8, 60 sec: 43554.1, 300 sec: 43709.2). Total num frames: 1849425920. Throughput: 0: 11059.6. Samples: 212332392. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:49:58,977][24592] Avg episode reward: [(0, '5.001')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:49:59,210][626795] Updated weights for policy 0, policy_version 225762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:00,940][626795] Updated weights for policy 0, policy_version 225772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:02,673][626795] Updated weights for policy 0, policy_version 225782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:03,975][24592] Fps is (10 sec: 45057.4, 60 sec: 43690.6, 300 sec: 43737.0). Total num frames: 1849655296. Throughput: 0: 11089.6. Samples: 212402022. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:03,976][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:04,566][626795] Updated weights for policy 0, policy_version 225792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:06,361][626795] Updated weights for policy 0, policy_version 225802 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:08,140][626795] Updated weights for policy 0, policy_version 225812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:08,975][24592] Fps is (10 sec: 45875.3, 60 sec: 44646.4, 300 sec: 43959.1). Total num frames: 1849884672. Throughput: 0: 11076.7. Samples: 212469588. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:08,980][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:09,931][626795] Updated weights for policy 0, policy_version 225822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:11,688][626795] Updated weights for policy 0, policy_version 225832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:13,975][24592] Fps is (10 sec: 39321.3, 60 sec: 43417.5, 300 sec: 43709.2). Total num frames: 1850048512. Throughput: 0: 11062.9. Samples: 212503296. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:13,977][24592] Avg episode reward: [(0, '4.474')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:14,836][626795] Updated weights for policy 0, policy_version 225842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:16,582][626795] Updated weights for policy 0, policy_version 225852 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:18,294][626795] Updated weights for policy 0, policy_version 225862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:18,976][24592] Fps is (10 sec: 40140.0, 60 sec: 43690.6, 300 sec: 43764.7). Total num frames: 1850286080. Throughput: 0: 10785.0. Samples: 212558766. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:18,976][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:20,006][626795] Updated weights for policy 0, policy_version 225872 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:21,866][626795] Updated weights for policy 0, policy_version 225882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:23,697][626795] Updated weights for policy 0, policy_version 225892 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:23,975][24592] Fps is (10 sec: 46694.6, 60 sec: 43690.7, 300 sec: 43764.7). Total num frames: 1850515456. Throughput: 0: 11745.3. Samples: 212626698. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:23,977][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:25,568][626795] Updated weights for policy 0, policy_version 225902 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:27,253][626795] Updated weights for policy 0, policy_version 225912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:28,975][24592] Fps is (10 sec: 45876.1, 60 sec: 43690.6, 300 sec: 43764.7). Total num frames: 1850744832. Throughput: 0: 11107.7. Samples: 212661276. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:28,976][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:29,094][626795] Updated weights for policy 0, policy_version 225922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:30,957][626795] Updated weights for policy 0, policy_version 225932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:32,824][626795] Updated weights for policy 0, policy_version 225942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:33,976][24592] Fps is (10 sec: 45055.1, 60 sec: 43690.5, 300 sec: 43736.9). Total num frames: 1850966016. Throughput: 0: 11041.3. Samples: 212727930. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:33,977][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:34,566][626795] Updated weights for policy 0, policy_version 225952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:36,286][626795] Updated weights for policy 0, policy_version 225962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:38,203][626795] Updated weights for policy 0, policy_version 225972 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:38,975][24592] Fps is (10 sec: 45056.2, 60 sec: 44574.2, 300 sec: 43920.4). Total num frames: 1851195392. Throughput: 0: 11040.5. Samples: 212796666. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:38,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:40,017][626795] Updated weights for policy 0, policy_version 225982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:41,817][626795] Updated weights for policy 0, policy_version 225992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:43,547][626795] Updated weights for policy 0, policy_version 226002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:43,976][24592] Fps is (10 sec: 45875.7, 60 sec: 44646.3, 300 sec: 43959.1). Total num frames: 1851424768. Throughput: 0: 11052.9. Samples: 212829774. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:43,976][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:45,438][626795] Updated weights for policy 0, policy_version 226012 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:48,628][626795] Updated weights for policy 0, policy_version 226022 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:48,975][24592] Fps is (10 sec: 38502.3, 60 sec: 43417.6, 300 sec: 43709.2). Total num frames: 1851580416. Throughput: 0: 10683.7. Samples: 212882790. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:48,977][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:50,425][626795] Updated weights for policy 0, policy_version 226032 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:52,216][626795] Updated weights for policy 0, policy_version 226042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:53,975][24592] Fps is (10 sec: 38502.9, 60 sec: 43417.8, 300 sec: 43709.2). Total num frames: 1851809792. Throughput: 0: 10704.4. Samples: 212951286. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:53,978][24592] Avg episode reward: [(0, '4.387')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:54,014][626795] Updated weights for policy 0, policy_version 226052 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:55,831][626795] Updated weights for policy 0, policy_version 226062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:57,602][626795] Updated weights for policy 0, policy_version 226072 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:58,976][24592] Fps is (10 sec: 45874.3, 60 sec: 43554.0, 300 sec: 43709.2). Total num frames: 1852039168. Throughput: 0: 10698.2. Samples: 212984718. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:50:58,976][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:50:59,536][626795] Updated weights for policy 0, policy_version 226082 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:01,205][626795] Updated weights for policy 0, policy_version 226092 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:03,108][626795] Updated weights for policy 0, policy_version 226102 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:03,975][24592] Fps is (10 sec: 45055.7, 60 sec: 43417.6, 300 sec: 43681.4). Total num frames: 1852260352. Throughput: 0: 10975.9. Samples: 213052680. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:03,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000226106_1852260352.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:04,050][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000224823_1841750016.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:04,964][626795] Updated weights for policy 0, policy_version 226112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:06,728][626795] Updated weights for policy 0, policy_version 226122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:08,560][626795] Updated weights for policy 0, policy_version 226132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:08,975][24592] Fps is (10 sec: 45057.1, 60 sec: 43417.6, 300 sec: 43709.2). Total num frames: 1852489728. Throughput: 0: 10977.3. Samples: 213120678. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:08,976][24592] Avg episode reward: [(0, '4.908')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:10,315][626795] Updated weights for policy 0, policy_version 226142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:12,236][626795] Updated weights for policy 0, policy_version 226152 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:13,975][24592] Fps is (10 sec: 45055.9, 60 sec: 44373.3, 300 sec: 43875.8). Total num frames: 1852710912. Throughput: 0: 10942.4. Samples: 213153684. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:13,976][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:14,093][626795] Updated weights for policy 0, policy_version 226162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:15,748][626795] Updated weights for policy 0, policy_version 226172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:17,637][626795] Updated weights for policy 0, policy_version 226182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:18,975][24592] Fps is (10 sec: 45056.0, 60 sec: 44237.0, 300 sec: 43875.9). Total num frames: 1852940288. Throughput: 0: 10985.3. Samples: 213222264. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:18,977][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:20,647][626795] Updated weights for policy 0, policy_version 226192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:22,520][626795] Updated weights for policy 0, policy_version 226202 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:23,975][24592] Fps is (10 sec: 40141.3, 60 sec: 43281.1, 300 sec: 43709.2). Total num frames: 1853112320. Throughput: 0: 10650.7. Samples: 213275946. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:23,977][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:24,289][626795] Updated weights for policy 0, policy_version 226212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:26,114][626795] Updated weights for policy 0, policy_version 226222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:27,768][626795] Updated weights for policy 0, policy_version 226232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:28,975][24592] Fps is (10 sec: 40140.6, 60 sec: 43281.1, 300 sec: 43709.2). Total num frames: 1853341696. Throughput: 0: 10674.2. Samples: 213310110. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:28,977][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:29,709][626795] Updated weights for policy 0, policy_version 226242 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:31,539][626795] Updated weights for policy 0, policy_version 226252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:33,240][626795] Updated weights for policy 0, policy_version 226262 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:33,975][24592] Fps is (10 sec: 45056.0, 60 sec: 43281.3, 300 sec: 43709.2). Total num frames: 1853562880. Throughput: 0: 10980.8. Samples: 213376926. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:33,977][24592] Avg episode reward: [(0, '4.508')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:35,233][626795] Updated weights for policy 0, policy_version 226272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:37,013][626795] Updated weights for policy 0, policy_version 226282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:38,809][626795] Updated weights for policy 0, policy_version 226292 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:38,975][24592] Fps is (10 sec: 44236.8, 60 sec: 43144.5, 300 sec: 43681.4). Total num frames: 1853784064. Throughput: 0: 10966.0. Samples: 213444756. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:38,977][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:40,645][626795] Updated weights for policy 0, policy_version 226302 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:42,315][626795] Updated weights for policy 0, policy_version 226312 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:43,975][24592] Fps is (10 sec: 45055.5, 60 sec: 43144.6, 300 sec: 43653.6). Total num frames: 1854013440. Throughput: 0: 10991.4. Samples: 213479328. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:43,977][24592] Avg episode reward: [(0, '4.910')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:44,228][626795] Updated weights for policy 0, policy_version 226322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:46,090][626795] Updated weights for policy 0, policy_version 226332 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:47,953][626795] Updated weights for policy 0, policy_version 226342 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:48,976][24592] Fps is (10 sec: 45054.1, 60 sec: 44236.5, 300 sec: 43848.0). Total num frames: 1854234624. Throughput: 0: 10949.1. Samples: 213545394. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:48,977][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:49,780][626795] Updated weights for policy 0, policy_version 226352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:51,587][626795] Updated weights for policy 0, policy_version 226362 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:53,975][24592] Fps is (10 sec: 39322.2, 60 sec: 43281.1, 300 sec: 43653.7). Total num frames: 1854406656. Throughput: 0: 10717.9. Samples: 213602982. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:53,976][24592] Avg episode reward: [(0, '5.075')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:54,709][626795] Updated weights for policy 0, policy_version 226372 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:56,496][626795] Updated weights for policy 0, policy_version 226382 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:51:58,311][626795] Updated weights for policy 0, policy_version 226392 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:58,975][24592] Fps is (10 sec: 39323.2, 60 sec: 43144.7, 300 sec: 43653.8). Total num frames: 1854627840. Throughput: 0: 10616.0. Samples: 213631404. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:51:58,977][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:00,108][626795] Updated weights for policy 0, policy_version 226402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:01,968][626795] Updated weights for policy 0, policy_version 226412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:03,682][626795] Updated weights for policy 0, policy_version 226422 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:03,976][24592] Fps is (10 sec: 45053.8, 60 sec: 43280.8, 300 sec: 43653.6). Total num frames: 1854857216. Throughput: 0: 10627.4. Samples: 213700500. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:03,977][24592] Avg episode reward: [(0, '4.963')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:05,572][626795] Updated weights for policy 0, policy_version 226432 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:07,248][626795] Updated weights for policy 0, policy_version 226442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:08,975][24592] Fps is (10 sec: 45875.6, 60 sec: 43281.1, 300 sec: 43653.6). Total num frames: 1855086592. Throughput: 0: 10954.9. Samples: 213768918. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:08,978][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:09,097][626795] Updated weights for policy 0, policy_version 226452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:11,004][626795] Updated weights for policy 0, policy_version 226462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:12,848][626795] Updated weights for policy 0, policy_version 226472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:13,975][24592] Fps is (10 sec: 45057.8, 60 sec: 43281.1, 300 sec: 43653.6). Total num frames: 1855307776. Throughput: 0: 10924.9. Samples: 213801732. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:13,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:14,584][626795] Updated weights for policy 0, policy_version 226482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:16,355][626795] Updated weights for policy 0, policy_version 226492 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:18,290][626795] Updated weights for policy 0, policy_version 226502 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:18,975][24592] Fps is (10 sec: 44236.6, 60 sec: 43144.5, 300 sec: 43653.6). Total num frames: 1855528960. Throughput: 0: 10963.2. Samples: 213870270. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:18,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:20,014][626795] Updated weights for policy 0, policy_version 226512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:21,835][626795] Updated weights for policy 0, policy_version 226522 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:23,597][626795] Updated weights for policy 0, policy_version 226532 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:23,975][24592] Fps is (10 sec: 45056.0, 60 sec: 44100.2, 300 sec: 43848.0). Total num frames: 1855758336. Throughput: 0: 10973.3. Samples: 213938556. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:23,977][24592] Avg episode reward: [(0, '4.870')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:25,615][626795] Updated weights for policy 0, policy_version 226542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:28,565][626795] Updated weights for policy 0, policy_version 226552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:28,976][24592] Fps is (10 sec: 39320.3, 60 sec: 43007.8, 300 sec: 43625.8). Total num frames: 1855922176. Throughput: 0: 10810.7. Samples: 213965814. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:28,976][24592] Avg episode reward: [(0, '4.973')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:30,554][626795] Updated weights for policy 0, policy_version 226562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:32,424][626795] Updated weights for policy 0, policy_version 226572 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:33,975][24592] Fps is (10 sec: 38502.6, 60 sec: 43008.0, 300 sec: 43598.1). Total num frames: 1856143360. Throughput: 0: 10600.6. Samples: 214022418. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:33,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:34,312][626795] Updated weights for policy 0, policy_version 226582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:36,009][626795] Updated weights for policy 0, policy_version 226592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:37,742][626795] Updated weights for policy 0, policy_version 226602 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:38,975][24592] Fps is (10 sec: 45057.3, 60 sec: 43144.5, 300 sec: 43598.1). Total num frames: 1856372736. Throughput: 0: 10847.7. Samples: 214091130. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:38,977][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:39,643][626795] Updated weights for policy 0, policy_version 226612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:41,503][626795] Updated weights for policy 0, policy_version 226622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:43,226][626795] Updated weights for policy 0, policy_version 226632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:43,975][24592] Fps is (10 sec: 45875.0, 60 sec: 43144.6, 300 sec: 43625.9). Total num frames: 1856602112. Throughput: 0: 10970.0. Samples: 214125054. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:43,977][24592] Avg episode reward: [(0, '4.837')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:45,058][626795] Updated weights for policy 0, policy_version 226642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:46,889][626795] Updated weights for policy 0, policy_version 226652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:48,782][626795] Updated weights for policy 0, policy_version 226662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:48,975][24592] Fps is (10 sec: 45875.2, 60 sec: 43281.4, 300 sec: 43653.7). Total num frames: 1856831488. Throughput: 0: 10946.9. Samples: 214193106. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:48,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:50,527][626795] Updated weights for policy 0, policy_version 226672 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:52,310][626795] Updated weights for policy 0, policy_version 226682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:53,975][24592] Fps is (10 sec: 44236.9, 60 sec: 43963.7, 300 sec: 43758.7). Total num frames: 1857044480. Throughput: 0: 10894.5. Samples: 214259172. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:53,977][24592] Avg episode reward: [(0, '4.361')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:54,271][626795] Updated weights for policy 0, policy_version 226692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:56,108][626795] Updated weights for policy 0, policy_version 226702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:52:57,901][626795] Updated weights for policy 0, policy_version 226712 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:58,976][24592] Fps is (10 sec: 44235.1, 60 sec: 44100.0, 300 sec: 43764.8). Total num frames: 1857273856. Throughput: 0: 10904.6. Samples: 214292442. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:52:58,977][24592] Avg episode reward: [(0, '4.885')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:00,680][626795] Updated weights for policy 0, policy_version 226722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:02,439][626795] Updated weights for policy 0, policy_version 226732 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:03,975][24592] Fps is (10 sec: 40959.7, 60 sec: 43281.3, 300 sec: 43598.1). Total num frames: 1857454080. Throughput: 0: 10669.0. Samples: 214350378. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:03,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000226740_1857454080.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:04,078][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000225469_1847042048.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:04,330][626795] Updated weights for policy 0, policy_version 226742 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:06,184][626795] Updated weights for policy 0, policy_version 226752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:08,000][626795] Updated weights for policy 0, policy_version 226762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:08,976][24592] Fps is (10 sec: 39322.2, 60 sec: 43007.8, 300 sec: 43542.6). Total num frames: 1857667072. Throughput: 0: 10624.4. Samples: 214416654. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:08,977][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:09,894][626795] Updated weights for policy 0, policy_version 226772 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:11,749][626795] Updated weights for policy 0, policy_version 226782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:13,480][626795] Updated weights for policy 0, policy_version 226792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:13,975][24592] Fps is (10 sec: 45056.5, 60 sec: 43281.1, 300 sec: 43598.2). Total num frames: 1857904640. Throughput: 0: 10767.5. Samples: 214450350. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:13,977][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:15,223][626795] Updated weights for policy 0, policy_version 226802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:17,108][626795] Updated weights for policy 0, policy_version 226812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:18,927][626795] Updated weights for policy 0, policy_version 226822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:18,975][24592] Fps is (10 sec: 45876.4, 60 sec: 43281.1, 300 sec: 43570.4). Total num frames: 1858125824. Throughput: 0: 11009.3. Samples: 214517838. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:18,976][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:20,647][626795] Updated weights for policy 0, policy_version 226832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:22,480][626795] Updated weights for policy 0, policy_version 226842 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:23,102][626772] Signal inference workers to stop experience collection... (2900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:23,103][626772] Signal inference workers to resume experience collection... (2900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:23,113][626795] InferenceWorker_p0-w0: stopping experience collection (2900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:23,119][626795] InferenceWorker_p0-w0: resuming experience collection (2900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:23,975][24592] Fps is (10 sec: 45056.0, 60 sec: 43281.1, 300 sec: 43570.3). Total num frames: 1858355200. Throughput: 0: 10989.2. Samples: 214585644. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:23,978][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:24,425][626795] Updated weights for policy 0, policy_version 226852 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:26,317][626795] Updated weights for policy 0, policy_version 226862 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:27,877][626795] Updated weights for policy 0, policy_version 226872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:28,975][24592] Fps is (10 sec: 45056.1, 60 sec: 44237.1, 300 sec: 43737.0). Total num frames: 1858576384. Throughput: 0: 10975.4. Samples: 214618944. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:28,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:29,843][626795] Updated weights for policy 0, policy_version 226882 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:31,664][626795] Updated weights for policy 0, policy_version 226892 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:33,975][24592] Fps is (10 sec: 39321.6, 60 sec: 43417.6, 300 sec: 43514.8). Total num frames: 1858748416. Throughput: 0: 10967.9. Samples: 214686660. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:33,976][24592] Avg episode reward: [(0, '4.975')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:34,843][626795] Updated weights for policy 0, policy_version 226902 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:36,532][626795] Updated weights for policy 0, policy_version 226912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:38,356][626795] Updated weights for policy 0, policy_version 226922 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:38,975][24592] Fps is (10 sec: 39321.4, 60 sec: 43281.1, 300 sec: 43514.8). Total num frames: 1858969600. Throughput: 0: 10689.6. Samples: 214740204. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:38,977][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:40,162][626795] Updated weights for policy 0, policy_version 226932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:42,058][626795] Updated weights for policy 0, policy_version 226942 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:43,808][626795] Updated weights for policy 0, policy_version 226952 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:43,976][24592] Fps is (10 sec: 44236.2, 60 sec: 43144.5, 300 sec: 43487.0). Total num frames: 1859190784. Throughput: 0: 10701.1. Samples: 214773990. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:43,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:45,606][626795] Updated weights for policy 0, policy_version 226962 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:47,429][626795] Updated weights for policy 0, policy_version 226972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:48,975][24592] Fps is (10 sec: 45056.0, 60 sec: 43144.5, 300 sec: 43514.8). Total num frames: 1859420160. Throughput: 0: 10922.2. Samples: 214841874. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:48,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:49,303][626795] Updated weights for policy 0, policy_version 226982 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:51,043][626795] Updated weights for policy 0, policy_version 226992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:52,948][626795] Updated weights for policy 0, policy_version 227002 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:53,975][24592] Fps is (10 sec: 45056.5, 60 sec: 43281.1, 300 sec: 43487.0). Total num frames: 1859641344. Throughput: 0: 10949.3. Samples: 214909368. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:53,977][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:54,806][626795] Updated weights for policy 0, policy_version 227012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:56,553][626795] Updated weights for policy 0, policy_version 227022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:53:58,369][626795] Updated weights for policy 0, policy_version 227032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:58,975][24592] Fps is (10 sec: 45055.6, 60 sec: 43281.3, 300 sec: 43514.8). Total num frames: 1859870720. Throughput: 0: 10930.5. Samples: 214942224. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:53:58,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:00,242][626795] Updated weights for policy 0, policy_version 227042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:02,120][626795] Updated weights for policy 0, policy_version 227052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:03,836][626795] Updated weights for policy 0, policy_version 227062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:03,975][24592] Fps is (10 sec: 45056.0, 60 sec: 43963.8, 300 sec: 43681.4). Total num frames: 1860091904. Throughput: 0: 10928.8. Samples: 215009634. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:03,976][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:05,726][626795] Updated weights for policy 0, policy_version 227072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:08,809][626795] Updated weights for policy 0, policy_version 227082 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:08,975][24592] Fps is (10 sec: 39322.1, 60 sec: 43281.3, 300 sec: 43459.3). Total num frames: 1860263936. Throughput: 0: 10617.1. Samples: 215063412. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:08,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:10,618][626795] Updated weights for policy 0, policy_version 227092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:12,369][626795] Updated weights for policy 0, policy_version 227102 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:13,976][24592] Fps is (10 sec: 39320.1, 60 sec: 43007.7, 300 sec: 43459.2). Total num frames: 1860485120. Throughput: 0: 10624.8. Samples: 215097066. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:13,977][24592] Avg episode reward: [(0, '4.441')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:14,240][626795] Updated weights for policy 0, policy_version 227112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:16,083][626795] Updated weights for policy 0, policy_version 227122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:17,869][626795] Updated weights for policy 0, policy_version 227132 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:18,975][24592] Fps is (10 sec: 45055.8, 60 sec: 43144.5, 300 sec: 43459.3). Total num frames: 1860714496. Throughput: 0: 10610.9. Samples: 215164152. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:18,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:19,810][626795] Updated weights for policy 0, policy_version 227142 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:21,738][626795] Updated weights for policy 0, policy_version 227152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:23,711][626795] Updated weights for policy 0, policy_version 227162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:23,975][24592] Fps is (10 sec: 43419.0, 60 sec: 42734.9, 300 sec: 43375.9). Total num frames: 1860919296. Throughput: 0: 10851.7. Samples: 215228532. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:23,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:25,663][626795] Updated weights for policy 0, policy_version 227172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:27,619][626795] Updated weights for policy 0, policy_version 227182 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:28,976][24592] Fps is (10 sec: 40957.0, 60 sec: 42461.3, 300 sec: 43320.3). Total num frames: 1861124096. Throughput: 0: 10776.1. Samples: 215258922. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:28,979][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:29,521][626795] Updated weights for policy 0, policy_version 227192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:31,480][626795] Updated weights for policy 0, policy_version 227202 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:33,386][626795] Updated weights for policy 0, policy_version 227212 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 43144.5, 300 sec: 43441.4). Total num frames: 1861337088. Throughput: 0: 10700.5. Samples: 215323398. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:33,977][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:35,238][626795] Updated weights for policy 0, policy_version 227222 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:37,034][626795] Updated weights for policy 0, policy_version 227232 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:38,941][626795] Updated weights for policy 0, policy_version 227242 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:38,975][24592] Fps is (10 sec: 44239.9, 60 sec: 43281.1, 300 sec: 43459.2). Total num frames: 1861566464. Throughput: 0: 10670.3. Samples: 215389530. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:38,976][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:40,708][626795] Updated weights for policy 0, policy_version 227252 (0.0035)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:42,533][626795] Updated weights for policy 0, policy_version 227262 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:43,975][24592] Fps is (10 sec: 45055.9, 60 sec: 43281.1, 300 sec: 43431.5). Total num frames: 1861787648. Throughput: 0: 10682.7. Samples: 215422944. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:43,976][24592] Avg episode reward: [(0, '4.960')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:44,514][626795] Updated weights for policy 0, policy_version 227272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:46,248][626795] Updated weights for policy 0, policy_version 227282 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:48,250][626795] Updated weights for policy 0, policy_version 227292 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:48,975][24592] Fps is (10 sec: 44236.6, 60 sec: 43144.5, 300 sec: 43403.8). Total num frames: 1862008832. Throughput: 0: 10638.9. Samples: 215488386. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:48,976][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:50,113][626795] Updated weights for policy 0, policy_version 227302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:51,966][626795] Updated weights for policy 0, policy_version 227312 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:53,686][626795] Updated weights for policy 0, policy_version 227322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:53,975][24592] Fps is (10 sec: 44237.1, 60 sec: 43144.6, 300 sec: 43403.7). Total num frames: 1862230016. Throughput: 0: 10923.1. Samples: 215554950. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:53,976][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:55,665][626795] Updated weights for policy 0, policy_version 227332 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:57,521][626795] Updated weights for policy 0, policy_version 227342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:58,976][24592] Fps is (10 sec: 44236.2, 60 sec: 43007.9, 300 sec: 43375.9). Total num frames: 1862451200. Throughput: 0: 10893.6. Samples: 215587278. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:54:58,976][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:54:59,342][626795] Updated weights for policy 0, policy_version 227352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:01,234][626795] Updated weights for policy 0, policy_version 227362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:03,101][626795] Updated weights for policy 0, policy_version 227372 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:03,975][24592] Fps is (10 sec: 43417.3, 60 sec: 42871.4, 300 sec: 43320.4). Total num frames: 1862664192. Throughput: 0: 10870.9. Samples: 215653344. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:03,978][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:03,986][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000227376_1862664192.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:04,057][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000226106_1852260352.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:05,019][626795] Updated weights for policy 0, policy_version 227382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:06,842][626795] Updated weights for policy 0, policy_version 227392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:08,815][626795] Updated weights for policy 0, policy_version 227402 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:08,975][24592] Fps is (10 sec: 42599.2, 60 sec: 43554.1, 300 sec: 43487.0). Total num frames: 1862877184. Throughput: 0: 10879.3. Samples: 215718102. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:08,977][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:10,692][626795] Updated weights for policy 0, policy_version 227412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:13,229][626795] Updated weights for policy 0, policy_version 227422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:13,975][24592] Fps is (10 sec: 40140.7, 60 sec: 43008.2, 300 sec: 43320.4). Total num frames: 1863065600. Throughput: 0: 10934.7. Samples: 215750976. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:13,976][24592] Avg episode reward: [(0, '4.824')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:14,961][626795] Updated weights for policy 0, policy_version 227432 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:16,842][626795] Updated weights for policy 0, policy_version 227442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:18,731][626795] Updated weights for policy 0, policy_version 227452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:18,975][24592] Fps is (10 sec: 41778.9, 60 sec: 43007.9, 300 sec: 43320.4). Total num frames: 1863294976. Throughput: 0: 10830.3. Samples: 215810760. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:18,976][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:20,535][626795] Updated weights for policy 0, policy_version 227462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:22,383][626795] Updated weights for policy 0, policy_version 227472 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:23,976][24592] Fps is (10 sec: 45873.6, 60 sec: 43417.3, 300 sec: 43320.3). Total num frames: 1863524352. Throughput: 0: 10862.7. Samples: 215878356. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:23,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:24,140][626795] Updated weights for policy 0, policy_version 227482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:25,959][626795] Updated weights for policy 0, policy_version 227492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:27,836][626795] Updated weights for policy 0, policy_version 227502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:28,976][24592] Fps is (10 sec: 44236.3, 60 sec: 43554.5, 300 sec: 43292.6). Total num frames: 1863737344. Throughput: 0: 10849.4. Samples: 215911170. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:28,978][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:29,830][626795] Updated weights for policy 0, policy_version 227512 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:31,680][626795] Updated weights for policy 0, policy_version 227522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:33,420][626795] Updated weights for policy 0, policy_version 227532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:33,976][24592] Fps is (10 sec: 43418.1, 60 sec: 43690.5, 300 sec: 43264.8). Total num frames: 1863958528. Throughput: 0: 10858.7. Samples: 215977032. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:33,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:35,340][626795] Updated weights for policy 0, policy_version 227542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:37,201][626795] Updated weights for policy 0, policy_version 227552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:38,986][24592] Fps is (10 sec: 44192.6, 60 sec: 43546.8, 300 sec: 43235.6). Total num frames: 1864179712. Throughput: 0: 10864.1. Samples: 216043944. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:38,986][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:39,019][626795] Updated weights for policy 0, policy_version 227562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:40,919][626795] Updated weights for policy 0, policy_version 227572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:42,926][626795] Updated weights for policy 0, policy_version 227582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:43,976][24592] Fps is (10 sec: 43415.6, 60 sec: 43417.1, 300 sec: 43431.4). Total num frames: 1864392704. Throughput: 0: 10846.3. Samples: 216075366. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:43,978][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:45,444][626795] Updated weights for policy 0, policy_version 227592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:47,466][626795] Updated weights for policy 0, policy_version 227602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:48,975][24592] Fps is (10 sec: 40181.5, 60 sec: 42871.5, 300 sec: 43292.6). Total num frames: 1864581120. Throughput: 0: 10637.7. Samples: 216132042. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:48,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:49,358][626795] Updated weights for policy 0, policy_version 227612 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:51,276][626795] Updated weights for policy 0, policy_version 227622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:53,099][626795] Updated weights for policy 0, policy_version 227632 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:53,975][24592] Fps is (10 sec: 40144.1, 60 sec: 42734.9, 300 sec: 43237.1). Total num frames: 1864794112. Throughput: 0: 10645.2. Samples: 216197136. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:53,977][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:54,863][626795] Updated weights for policy 0, policy_version 227642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:56,799][626795] Updated weights for policy 0, policy_version 227652 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:55:58,689][626795] Updated weights for policy 0, policy_version 227662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:58,975][24592] Fps is (10 sec: 44237.1, 60 sec: 42871.6, 300 sec: 43264.9). Total num frames: 1865023488. Throughput: 0: 10654.6. Samples: 216230430. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:55:58,976][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:00,488][626795] Updated weights for policy 0, policy_version 227672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:02,304][626795] Updated weights for policy 0, policy_version 227682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:03,975][24592] Fps is (10 sec: 45055.9, 60 sec: 43008.0, 300 sec: 43237.1). Total num frames: 1865244672. Throughput: 0: 10801.8. Samples: 216296838. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:03,977][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:04,178][626795] Updated weights for policy 0, policy_version 227692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:06,148][626795] Updated weights for policy 0, policy_version 227702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:08,000][626795] Updated weights for policy 0, policy_version 227712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:08,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42871.5, 300 sec: 43181.6). Total num frames: 1865449472. Throughput: 0: 10729.4. Samples: 216361176. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:08,978][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:09,928][626795] Updated weights for policy 0, policy_version 227722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:11,772][626795] Updated weights for policy 0, policy_version 227732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:13,663][626795] Updated weights for policy 0, policy_version 227742 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:13,976][24592] Fps is (10 sec: 43416.7, 60 sec: 43554.1, 300 sec: 43181.5). Total num frames: 1865678848. Throughput: 0: 10757.1. Samples: 216395238. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:13,977][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:15,750][626795] Updated weights for policy 0, policy_version 227752 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:17,548][626795] Updated weights for policy 0, policy_version 227762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:18,975][24592] Fps is (10 sec: 43417.6, 60 sec: 43144.6, 300 sec: 43292.6). Total num frames: 1865883648. Throughput: 0: 10694.2. Samples: 216458268. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:18,977][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:19,494][626795] Updated weights for policy 0, policy_version 227772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:21,461][626795] Updated weights for policy 0, policy_version 227782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:23,513][626795] Updated weights for policy 0, policy_version 227792 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:23,976][24592] Fps is (10 sec: 40959.4, 60 sec: 42735.0, 300 sec: 43209.3). Total num frames: 1866088448. Throughput: 0: 10593.0. Samples: 216520524. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:23,977][24592] Avg episode reward: [(0, '4.505')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:25,502][626795] Updated weights for policy 0, policy_version 227802 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:27,348][626795] Updated weights for policy 0, policy_version 227812 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:28,976][24592] Fps is (10 sec: 41778.5, 60 sec: 42734.9, 300 sec: 43181.5). Total num frames: 1866301440. Throughput: 0: 10590.0. Samples: 216551910. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:28,977][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:29,213][626795] Updated weights for policy 0, policy_version 227822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:31,329][626795] Updated weights for policy 0, policy_version 227832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:33,259][626795] Updated weights for policy 0, policy_version 227842 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:33,975][24592] Fps is (10 sec: 41780.7, 60 sec: 42462.1, 300 sec: 43126.0). Total num frames: 1866506240. Throughput: 0: 10722.2. Samples: 216614538. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:33,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:35,135][626795] Updated weights for policy 0, policy_version 227852 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:37,032][626795] Updated weights for policy 0, policy_version 227862 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:38,759][626795] Updated weights for policy 0, policy_version 227872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:38,977][24592] Fps is (10 sec: 43413.3, 60 sec: 42604.8, 300 sec: 43125.9). Total num frames: 1866735616. Throughput: 0: 10749.4. Samples: 216680874. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:38,982][24592] Avg episode reward: [(0, '4.899')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:40,801][626795] Updated weights for policy 0, policy_version 227882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:42,619][626795] Updated weights for policy 0, policy_version 227892 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:43,975][24592] Fps is (10 sec: 44236.6, 60 sec: 42598.9, 300 sec: 43098.3). Total num frames: 1866948608. Throughput: 0: 10718.7. Samples: 216712770. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:43,976][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:44,382][626795] Updated weights for policy 0, policy_version 227902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:46,544][626795] Updated weights for policy 0, policy_version 227912 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:48,228][626795] Updated weights for policy 0, policy_version 227922 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:48,975][24592] Fps is (10 sec: 42603.2, 60 sec: 43008.0, 300 sec: 43237.1). Total num frames: 1867161600. Throughput: 0: 10671.5. Samples: 216777054. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:48,977][24592] Avg episode reward: [(0, '4.376')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:50,250][626795] Updated weights for policy 0, policy_version 227932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:52,044][626795] Updated weights for policy 0, policy_version 227942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:53,976][24592] Fps is (10 sec: 42597.2, 60 sec: 43007.8, 300 sec: 43209.3). Total num frames: 1867374592. Throughput: 0: 10683.9. Samples: 216841956. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:53,977][24592] Avg episode reward: [(0, '4.894')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:54,098][626795] Updated weights for policy 0, policy_version 227952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:56,214][626795] Updated weights for policy 0, policy_version 227962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:58,010][626795] Updated weights for policy 0, policy_version 227972 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:58,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42598.3, 300 sec: 43126.1). Total num frames: 1867579392. Throughput: 0: 10576.3. Samples: 216871170. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:56:58,977][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:56:59,883][626795] Updated weights for policy 0, policy_version 227982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:01,917][626795] Updated weights for policy 0, policy_version 227992 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:03,849][626795] Updated weights for policy 0, policy_version 228002 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:03,975][24592] Fps is (10 sec: 41780.4, 60 sec: 42461.9, 300 sec: 43070.5). Total num frames: 1867792384. Throughput: 0: 10616.5. Samples: 216936012. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:03,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000228002_1867792384.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:04,064][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000226740_1857454080.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:05,734][626795] Updated weights for policy 0, policy_version 228012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:07,574][626795] Updated weights for policy 0, policy_version 228022 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:08,976][24592] Fps is (10 sec: 43417.0, 60 sec: 42734.8, 300 sec: 43070.5). Total num frames: 1868013568. Throughput: 0: 10692.6. Samples: 217001688. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:08,978][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:09,479][626795] Updated weights for policy 0, policy_version 228032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:11,550][626795] Updated weights for policy 0, policy_version 228042 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:13,324][626795] Updated weights for policy 0, policy_version 228052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:13,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42462.0, 300 sec: 43042.7). Total num frames: 1868226560. Throughput: 0: 10668.2. Samples: 217031976. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:13,977][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:15,258][626795] Updated weights for policy 0, policy_version 228062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:17,078][626795] Updated weights for policy 0, policy_version 228072 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:18,975][24592] Fps is (10 sec: 41780.2, 60 sec: 42461.9, 300 sec: 42959.4). Total num frames: 1868431360. Throughput: 0: 10726.0. Samples: 217097208. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:18,978][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:19,154][626795] Updated weights for policy 0, policy_version 228082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:21,130][626795] Updated weights for policy 0, policy_version 228092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:22,891][626795] Updated weights for policy 0, policy_version 228102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:23,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42598.7, 300 sec: 43126.1). Total num frames: 1868644352. Throughput: 0: 10682.4. Samples: 217161570. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:23,977][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:24,863][626795] Updated weights for policy 0, policy_version 228112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:26,993][626795] Updated weights for policy 0, policy_version 228122 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:28,822][626795] Updated weights for policy 0, policy_version 228132 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:28,976][24592] Fps is (10 sec: 42594.9, 60 sec: 42598.0, 300 sec: 43098.1). Total num frames: 1868857344. Throughput: 0: 10644.7. Samples: 217191792. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:28,977][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:30,735][626795] Updated weights for policy 0, policy_version 228142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:32,472][626795] Updated weights for policy 0, policy_version 228152 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:33,976][24592] Fps is (10 sec: 43416.4, 60 sec: 42871.3, 300 sec: 43070.4). Total num frames: 1869078528. Throughput: 0: 10657.7. Samples: 217256652. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:33,978][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:34,668][626795] Updated weights for policy 0, policy_version 228162 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:36,574][626795] Updated weights for policy 0, policy_version 228172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:38,459][626795] Updated weights for policy 0, policy_version 228182 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:38,975][24592] Fps is (10 sec: 43421.1, 60 sec: 42599.2, 300 sec: 43014.9). Total num frames: 1869291520. Throughput: 0: 10620.9. Samples: 217319892. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:38,977][24592] Avg episode reward: [(0, '4.916')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:40,357][626795] Updated weights for policy 0, policy_version 228192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:42,292][626795] Updated weights for policy 0, policy_version 228202 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:43,976][24592] Fps is (10 sec: 41776.2, 60 sec: 42461.2, 300 sec: 42931.5). Total num frames: 1869496320. Throughput: 0: 10676.9. Samples: 217351638. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:43,977][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:44,287][626795] Updated weights for policy 0, policy_version 228212 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:46,118][626795] Updated weights for policy 0, policy_version 228222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:48,027][626795] Updated weights for policy 0, policy_version 228232 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:48,976][24592] Fps is (10 sec: 42597.7, 60 sec: 42598.3, 300 sec: 42959.4). Total num frames: 1869717504. Throughput: 0: 10676.4. Samples: 217416450. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:48,977][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:49,912][626795] Updated weights for policy 0, policy_version 228242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:51,899][626795] Updated weights for policy 0, policy_version 228252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:53,788][626795] Updated weights for policy 0, policy_version 228262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:53,976][24592] Fps is (10 sec: 43420.2, 60 sec: 42598.3, 300 sec: 42903.9). Total num frames: 1869930496. Throughput: 0: 10623.0. Samples: 217479726. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:53,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:55,794][626795] Updated weights for policy 0, policy_version 228272 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:57,787][626795] Updated weights for policy 0, policy_version 228282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:58,976][24592] Fps is (10 sec: 40958.6, 60 sec: 42461.6, 300 sec: 42959.3). Total num frames: 1870127104. Throughput: 0: 10640.3. Samples: 217510794. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:57:58,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:57:59,865][626795] Updated weights for policy 0, policy_version 228292 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:01,817][626795] Updated weights for policy 0, policy_version 228302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:03,693][626795] Updated weights for policy 0, policy_version 228312 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:03,976][24592] Fps is (10 sec: 40960.5, 60 sec: 42461.7, 300 sec: 42959.4). Total num frames: 1870340096. Throughput: 0: 10568.3. Samples: 217572786. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:03,977][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:05,707][626795] Updated weights for policy 0, policy_version 228322 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:07,695][626795] Updated weights for policy 0, policy_version 228332 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:08,975][24592] Fps is (10 sec: 41781.5, 60 sec: 42189.0, 300 sec: 42848.3). Total num frames: 1870544896. Throughput: 0: 10522.9. Samples: 217635102. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:08,976][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:09,587][626795] Updated weights for policy 0, policy_version 228342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:11,381][626795] Updated weights for policy 0, policy_version 228352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:13,339][626795] Updated weights for policy 0, policy_version 228362 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:13,975][24592] Fps is (10 sec: 42599.4, 60 sec: 42325.3, 300 sec: 42848.3). Total num frames: 1870766080. Throughput: 0: 10592.2. Samples: 217668432. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:13,977][24592] Avg episode reward: [(0, '4.839')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:15,303][626795] Updated weights for policy 0, policy_version 228372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:17,194][626795] Updated weights for policy 0, policy_version 228382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:18,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42461.9, 300 sec: 42792.8). Total num frames: 1870979072. Throughput: 0: 10568.5. Samples: 217732230. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:18,977][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:18,986][626795] Updated weights for policy 0, policy_version 228392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:20,868][626795] Updated weights for policy 0, policy_version 228402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:22,815][626795] Updated weights for policy 0, policy_version 228412 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:23,976][24592] Fps is (10 sec: 42594.2, 60 sec: 42461.1, 300 sec: 42764.9). Total num frames: 1871192064. Throughput: 0: 10594.2. Samples: 217796640. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:23,978][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:24,827][626795] Updated weights for policy 0, policy_version 228422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:26,639][626795] Updated weights for policy 0, policy_version 228432 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:28,543][626795] Updated weights for policy 0, policy_version 228442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:28,976][24592] Fps is (10 sec: 43415.9, 60 sec: 42598.7, 300 sec: 42931.6). Total num frames: 1871413248. Throughput: 0: 10628.7. Samples: 217829922. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:28,977][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:30,472][626795] Updated weights for policy 0, policy_version 228452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:32,594][626795] Updated weights for policy 0, policy_version 228462 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:33,975][24592] Fps is (10 sec: 42602.8, 60 sec: 42325.5, 300 sec: 42876.1). Total num frames: 1871618048. Throughput: 0: 10565.4. Samples: 217891890. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:33,976][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:34,398][626795] Updated weights for policy 0, policy_version 228472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:36,330][626795] Updated weights for policy 0, policy_version 228482 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:38,312][626795] Updated weights for policy 0, policy_version 228492 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:38,976][24592] Fps is (10 sec: 40960.2, 60 sec: 42188.6, 300 sec: 42820.5). Total num frames: 1871822848. Throughput: 0: 10577.2. Samples: 217955700. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:38,977][24592] Avg episode reward: [(0, '4.389')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:40,296][626795] Updated weights for policy 0, policy_version 228502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:42,140][626795] Updated weights for policy 0, policy_version 228512 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:43,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42462.6, 300 sec: 42792.8). Total num frames: 1872044032. Throughput: 0: 10600.9. Samples: 217987830. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:43,977][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:44,014][626795] Updated weights for policy 0, policy_version 228522 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:45,962][626795] Updated weights for policy 0, policy_version 228532 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:48,025][626795] Updated weights for policy 0, policy_version 228542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:48,975][24592] Fps is (10 sec: 43419.0, 60 sec: 42325.4, 300 sec: 42765.0). Total num frames: 1872257024. Throughput: 0: 10635.0. Samples: 218051358. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:48,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:49,803][626795] Updated weights for policy 0, policy_version 228552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:51,741][626795] Updated weights for policy 0, policy_version 228562 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:53,607][626795] Updated weights for policy 0, policy_version 228572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:53,976][24592] Fps is (10 sec: 42597.6, 60 sec: 42325.5, 300 sec: 42709.5). Total num frames: 1872470016. Throughput: 0: 10690.4. Samples: 218116170. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:53,977][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:55,688][626795] Updated weights for policy 0, policy_version 228582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:57,456][626795] Updated weights for policy 0, policy_version 228592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:58,976][24592] Fps is (10 sec: 42596.7, 60 sec: 42598.5, 300 sec: 42681.6). Total num frames: 1872683008. Throughput: 0: 10642.7. Samples: 218147358. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:58:58,977][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:58:59,460][626795] Updated weights for policy 0, policy_version 228602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:01,340][626795] Updated weights for policy 0, policy_version 228612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:03,277][626795] Updated weights for policy 0, policy_version 228622 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:03,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42598.5, 300 sec: 42820.5). Total num frames: 1872896000. Throughput: 0: 10650.4. Samples: 218211498. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:03,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000228625_1872896000.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:04,081][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000227376_1862664192.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:05,292][626795] Updated weights for policy 0, policy_version 228632 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:07,253][626795] Updated weights for policy 0, policy_version 228642 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:08,975][24592] Fps is (10 sec: 42600.2, 60 sec: 42734.9, 300 sec: 42792.8). Total num frames: 1873108992. Throughput: 0: 10632.2. Samples: 218275080. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:08,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:09,077][626795] Updated weights for policy 0, policy_version 228652 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:10,962][626795] Updated weights for policy 0, policy_version 228662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:12,965][626795] Updated weights for policy 0, policy_version 228672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:13,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42598.4, 300 sec: 42737.2). Total num frames: 1873321984. Throughput: 0: 10601.7. Samples: 218306994. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:13,977][24592] Avg episode reward: [(0, '4.505')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:14,808][626795] Updated weights for policy 0, policy_version 228682 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:16,755][626795] Updated weights for policy 0, policy_version 228692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:18,668][626795] Updated weights for policy 0, policy_version 228702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:18,975][24592] Fps is (10 sec: 43417.4, 60 sec: 42734.9, 300 sec: 42792.8). Total num frames: 1873543168. Throughput: 0: 10670.0. Samples: 218372040. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:18,980][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:20,618][626795] Updated weights for policy 0, policy_version 228712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:22,443][626795] Updated weights for policy 0, policy_version 228722 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:23,975][24592] Fps is (10 sec: 43417.8, 60 sec: 42735.7, 300 sec: 42820.7). Total num frames: 1873756160. Throughput: 0: 10682.8. Samples: 218436420. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:23,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:24,366][626795] Updated weights for policy 0, policy_version 228732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:26,259][626795] Updated weights for policy 0, policy_version 228742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:28,205][626795] Updated weights for policy 0, policy_version 228752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:28,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42462.2, 300 sec: 42792.8). Total num frames: 1873960960. Throughput: 0: 10660.3. Samples: 218467542. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:28,978][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:30,121][626795] Updated weights for policy 0, policy_version 228762 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:32,104][626795] Updated weights for policy 0, policy_version 228772 (0.0030)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:33,976][24592] Fps is (10 sec: 41778.3, 60 sec: 42598.3, 300 sec: 42737.2). Total num frames: 1874173952. Throughput: 0: 10678.4. Samples: 218531886. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:33,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:34,063][626795] Updated weights for policy 0, policy_version 228782 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:35,956][626795] Updated weights for policy 0, policy_version 228792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:37,912][626795] Updated weights for policy 0, policy_version 228802 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:38,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42735.2, 300 sec: 42709.5). Total num frames: 1874386944. Throughput: 0: 10651.2. Samples: 218595474. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:38,976][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:39,852][626795] Updated weights for policy 0, policy_version 228812 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:41,725][626795] Updated weights for policy 0, policy_version 228822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:43,749][626795] Updated weights for policy 0, policy_version 228832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:43,975][24592] Fps is (10 sec: 42599.0, 60 sec: 42598.4, 300 sec: 42681.7). Total num frames: 1874599936. Throughput: 0: 10660.4. Samples: 218627070. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:43,976][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:45,577][626795] Updated weights for policy 0, policy_version 228842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:47,549][626795] Updated weights for policy 0, policy_version 228852 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:48,976][24592] Fps is (10 sec: 42597.2, 60 sec: 42598.2, 300 sec: 42653.9). Total num frames: 1874812928. Throughput: 0: 10665.3. Samples: 218691438. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:48,978][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:49,463][626795] Updated weights for policy 0, policy_version 228862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:51,340][626795] Updated weights for policy 0, policy_version 228872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:53,318][626795] Updated weights for policy 0, policy_version 228882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:53,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42598.5, 300 sec: 42626.2). Total num frames: 1875025920. Throughput: 0: 10664.3. Samples: 218754972. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:53,977][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:55,261][626795] Updated weights for policy 0, policy_version 228892 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:57,093][626795] Updated weights for policy 0, policy_version 228902 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:58,975][24592] Fps is (10 sec: 42599.5, 60 sec: 42598.7, 300 sec: 42626.2). Total num frames: 1875238912. Throughput: 0: 10669.6. Samples: 218787126. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 13:59:58,977][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 13:59:59,006][626795] Updated weights for policy 0, policy_version 228912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:00,980][626795] Updated weights for policy 0, policy_version 228922 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:02,841][626795] Updated weights for policy 0, policy_version 228932 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:03,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42598.5, 300 sec: 42626.2). Total num frames: 1875451904. Throughput: 0: 10649.9. Samples: 218851284. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:03,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:04,857][626795] Updated weights for policy 0, policy_version 228942 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:06,673][626795] Updated weights for policy 0, policy_version 228952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:08,644][626795] Updated weights for policy 0, policy_version 228962 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:08,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42734.9, 300 sec: 42737.3). Total num frames: 1875673088. Throughput: 0: 10642.6. Samples: 218915340. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:08,978][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:10,599][626795] Updated weights for policy 0, policy_version 228972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:12,533][626795] Updated weights for policy 0, policy_version 228982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:13,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42598.3, 300 sec: 42653.9). Total num frames: 1875877888. Throughput: 0: 10646.2. Samples: 218946624. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:13,977][24592] Avg episode reward: [(0, '5.008')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:14,343][626795] Updated weights for policy 0, policy_version 228992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:16,356][626795] Updated weights for policy 0, policy_version 229002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:18,268][626795] Updated weights for policy 0, policy_version 229012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:18,976][24592] Fps is (10 sec: 41778.6, 60 sec: 42461.8, 300 sec: 42598.4). Total num frames: 1876090880. Throughput: 0: 10641.3. Samples: 219010746. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:18,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:19,877][626772] Signal inference workers to stop experience collection... (2950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:19,888][626772] Signal inference workers to resume experience collection... (2950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:19,902][626795] InferenceWorker_p0-w0: stopping experience collection (2950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:19,908][626795] InferenceWorker_p0-w0: resuming experience collection (2950 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:20,165][626795] Updated weights for policy 0, policy_version 229022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:22,190][626795] Updated weights for policy 0, policy_version 229032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:23,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42461.8, 300 sec: 42598.4). Total num frames: 1876303872. Throughput: 0: 10639.1. Samples: 219074232. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:23,977][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:23,988][626795] Updated weights for policy 0, policy_version 229042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:25,888][626795] Updated weights for policy 0, policy_version 229052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:27,908][626795] Updated weights for policy 0, policy_version 229062 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:28,975][24592] Fps is (10 sec: 42599.4, 60 sec: 42598.4, 300 sec: 42570.7). Total num frames: 1876516864. Throughput: 0: 10660.6. Samples: 219106794. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:28,977][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:29,753][626795] Updated weights for policy 0, policy_version 229072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:31,747][626795] Updated weights for policy 0, policy_version 229082 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:33,626][626795] Updated weights for policy 0, policy_version 229092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:33,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42598.5, 300 sec: 42544.3). Total num frames: 1876729856. Throughput: 0: 10637.4. Samples: 219170118. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:33,978][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:35,545][626795] Updated weights for policy 0, policy_version 229102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:37,420][626795] Updated weights for policy 0, policy_version 229112 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:38,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42598.4, 300 sec: 42543.0). Total num frames: 1876942848. Throughput: 0: 10658.9. Samples: 219234624. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:38,977][24592] Avg episode reward: [(0, '4.844')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:39,433][626795] Updated weights for policy 0, policy_version 229122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:41,384][626795] Updated weights for policy 0, policy_version 229132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:43,250][626795] Updated weights for policy 0, policy_version 229142 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:43,976][24592] Fps is (10 sec: 42597.2, 60 sec: 42598.2, 300 sec: 42626.1). Total num frames: 1877155840. Throughput: 0: 10656.1. Samples: 219266652. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:43,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:45,211][626795] Updated weights for policy 0, policy_version 229152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:47,206][626795] Updated weights for policy 0, policy_version 229162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:48,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42598.6, 300 sec: 42626.2). Total num frames: 1877368832. Throughput: 0: 10639.9. Samples: 219330078. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:48,977][24592] Avg episode reward: [(0, '4.954')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:49,052][626795] Updated weights for policy 0, policy_version 229172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:51,100][626795] Updated weights for policy 0, policy_version 229182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:52,895][626795] Updated weights for policy 0, policy_version 229192 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:53,991][24592] Fps is (10 sec: 41715.3, 60 sec: 42450.7, 300 sec: 42540.6). Total num frames: 1877573632. Throughput: 0: 10612.2. Samples: 219393054. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:53,992][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:54,902][626795] Updated weights for policy 0, policy_version 229202 (0.0035)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:56,768][626795] Updated weights for policy 0, policy_version 229212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:00:58,793][626795] Updated weights for policy 0, policy_version 229222 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:58,976][24592] Fps is (10 sec: 42596.9, 60 sec: 42598.2, 300 sec: 42542.8). Total num frames: 1877794816. Throughput: 0: 10628.7. Samples: 219424920. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:00:58,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:00,574][626795] Updated weights for policy 0, policy_version 229232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:02,451][626795] Updated weights for policy 0, policy_version 229242 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:03,975][24592] Fps is (10 sec: 43485.7, 60 sec: 42598.4, 300 sec: 42570.6). Total num frames: 1878007808. Throughput: 0: 10635.9. Samples: 219489360. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:03,976][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:04,042][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000229250_1878016000.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:04,107][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000228002_1867792384.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:04,521][626795] Updated weights for policy 0, policy_version 229252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:06,399][626795] Updated weights for policy 0, policy_version 229262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:08,250][626795] Updated weights for policy 0, policy_version 229272 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:08,975][24592] Fps is (10 sec: 42599.7, 60 sec: 42461.9, 300 sec: 42515.1). Total num frames: 1878220800. Throughput: 0: 10643.6. Samples: 219553194. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:08,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:10,308][626795] Updated weights for policy 0, policy_version 229282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:12,195][626795] Updated weights for policy 0, policy_version 229292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:13,976][24592] Fps is (10 sec: 42596.9, 60 sec: 42598.2, 300 sec: 42542.8). Total num frames: 1878433792. Throughput: 0: 10634.4. Samples: 219585348. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:13,978][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:14,102][626795] Updated weights for policy 0, policy_version 229302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:15,942][626795] Updated weights for policy 0, policy_version 229312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:17,926][626795] Updated weights for policy 0, policy_version 229322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:18,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42598.5, 300 sec: 42570.7). Total num frames: 1878646784. Throughput: 0: 10644.8. Samples: 219649134. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:18,976][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:19,868][626795] Updated weights for policy 0, policy_version 229332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:21,721][626795] Updated weights for policy 0, policy_version 229342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:23,611][626795] Updated weights for policy 0, policy_version 229352 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:23,975][24592] Fps is (10 sec: 43419.1, 60 sec: 42734.9, 300 sec: 42598.4). Total num frames: 1878867968. Throughput: 0: 10645.1. Samples: 219713652. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:23,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:25,614][626795] Updated weights for policy 0, policy_version 229362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:27,631][626795] Updated weights for policy 0, policy_version 229372 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:28,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42598.3, 300 sec: 42598.4). Total num frames: 1879072768. Throughput: 0: 10622.2. Samples: 219744648. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:28,978][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:29,407][626795] Updated weights for policy 0, policy_version 229382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:31,316][626795] Updated weights for policy 0, policy_version 229392 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:33,242][626795] Updated weights for policy 0, policy_version 229402 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:33,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42735.0, 300 sec: 42570.8). Total num frames: 1879293952. Throughput: 0: 10652.8. Samples: 219809454. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:33,978][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:35,252][626795] Updated weights for policy 0, policy_version 229412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:37,105][626795] Updated weights for policy 0, policy_version 229422 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:38,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42598.4, 300 sec: 42542.9). Total num frames: 1879498752. Throughput: 0: 10682.5. Samples: 219873600. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:38,976][24592] Avg episode reward: [(0, '4.311')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:39,068][626795] Updated weights for policy 0, policy_version 229432 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:40,922][626795] Updated weights for policy 0, policy_version 229442 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:42,949][626795] Updated weights for policy 0, policy_version 229452 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:43,976][24592] Fps is (10 sec: 41777.2, 60 sec: 42598.3, 300 sec: 42542.8). Total num frames: 1879711744. Throughput: 0: 10689.0. Samples: 219905928. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:43,977][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:44,810][626795] Updated weights for policy 0, policy_version 229462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:46,737][626795] Updated weights for policy 0, policy_version 229472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:48,631][626795] Updated weights for policy 0, policy_version 229482 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:48,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42598.4, 300 sec: 42542.9). Total num frames: 1879924736. Throughput: 0: 10666.5. Samples: 219969354. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:48,976][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:50,546][626795] Updated weights for policy 0, policy_version 229492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:52,446][626795] Updated weights for policy 0, policy_version 229502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:53,975][24592] Fps is (10 sec: 43419.8, 60 sec: 42882.7, 300 sec: 42598.4). Total num frames: 1880145920. Throughput: 0: 10682.8. Samples: 220033920. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:53,976][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:54,449][626795] Updated weights for policy 0, policy_version 229512 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:56,225][626795] Updated weights for policy 0, policy_version 229522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:01:58,160][626795] Updated weights for policy 0, policy_version 229532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:58,976][24592] Fps is (10 sec: 42597.8, 60 sec: 42598.6, 300 sec: 42570.6). Total num frames: 1880350720. Throughput: 0: 10672.5. Samples: 220065606. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:01:58,976][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:00,110][626795] Updated weights for policy 0, policy_version 229542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:02,015][626795] Updated weights for policy 0, policy_version 229552 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42598.4, 300 sec: 42542.9). Total num frames: 1880563712. Throughput: 0: 10672.1. Samples: 220129380. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:03,976][24592] Avg episode reward: [(0, '4.904')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:03,993][626795] Updated weights for policy 0, policy_version 229562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:05,872][626795] Updated weights for policy 0, policy_version 229572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:07,862][626795] Updated weights for policy 0, policy_version 229582 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:08,975][24592] Fps is (10 sec: 42599.0, 60 sec: 42598.5, 300 sec: 42542.9). Total num frames: 1880776704. Throughput: 0: 10659.9. Samples: 220193346. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:08,977][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:09,811][626795] Updated weights for policy 0, policy_version 229592 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:11,730][626795] Updated weights for policy 0, policy_version 229602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:13,644][626795] Updated weights for policy 0, policy_version 229612 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:13,976][24592] Fps is (10 sec: 42594.7, 60 sec: 42598.0, 300 sec: 42570.5). Total num frames: 1880989696. Throughput: 0: 10678.0. Samples: 220225164. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:13,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:15,528][626795] Updated weights for policy 0, policy_version 229622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:17,540][626795] Updated weights for policy 0, policy_version 229632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:18,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42598.4, 300 sec: 42570.6). Total num frames: 1881202688. Throughput: 0: 10658.1. Samples: 220289070. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:18,976][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:19,376][626795] Updated weights for policy 0, policy_version 229642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:21,246][626795] Updated weights for policy 0, policy_version 229652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:23,176][626795] Updated weights for policy 0, policy_version 229662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:23,975][24592] Fps is (10 sec: 43420.9, 60 sec: 42598.3, 300 sec: 42598.5). Total num frames: 1881423872. Throughput: 0: 10664.5. Samples: 220353504. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:23,978][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:25,164][626795] Updated weights for policy 0, policy_version 229672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:26,951][626795] Updated weights for policy 0, policy_version 229682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:28,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42598.5, 300 sec: 42542.9). Total num frames: 1881628672. Throughput: 0: 10661.9. Samples: 220385706. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:28,979][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:29,005][626795] Updated weights for policy 0, policy_version 229692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:30,843][626795] Updated weights for policy 0, policy_version 229702 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:32,818][626795] Updated weights for policy 0, policy_version 229712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:33,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42598.4, 300 sec: 42570.6). Total num frames: 1881849856. Throughput: 0: 10676.8. Samples: 220449810. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:33,976][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:34,579][626795] Updated weights for policy 0, policy_version 229722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:36,629][626795] Updated weights for policy 0, policy_version 229732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:38,437][626795] Updated weights for policy 0, policy_version 229742 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:38,975][24592] Fps is (10 sec: 43417.2, 60 sec: 42734.9, 300 sec: 42598.5). Total num frames: 1882062848. Throughput: 0: 10674.9. Samples: 220514292. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:38,977][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:40,468][626795] Updated weights for policy 0, policy_version 229752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:42,206][626795] Updated weights for policy 0, policy_version 229762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:43,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42735.2, 300 sec: 42570.6). Total num frames: 1882275840. Throughput: 0: 10676.0. Samples: 220546026. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:43,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:44,207][626795] Updated weights for policy 0, policy_version 229772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:46,105][626795] Updated weights for policy 0, policy_version 229782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:48,075][626795] Updated weights for policy 0, policy_version 229792 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:48,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42734.9, 300 sec: 42570.7). Total num frames: 1882488832. Throughput: 0: 10693.6. Samples: 220610592. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:48,976][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:49,916][626795] Updated weights for policy 0, policy_version 229802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:51,890][626795] Updated weights for policy 0, policy_version 229812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:53,774][626795] Updated weights for policy 0, policy_version 229822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:53,977][24592] Fps is (10 sec: 43412.9, 60 sec: 42734.1, 300 sec: 42653.8). Total num frames: 1882710016. Throughput: 0: 10705.6. Samples: 220675110. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:53,978][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:55,705][626795] Updated weights for policy 0, policy_version 229832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:57,562][626795] Updated weights for policy 0, policy_version 229842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:58,976][24592] Fps is (10 sec: 42597.6, 60 sec: 42734.9, 300 sec: 42626.2). Total num frames: 1882914816. Throughput: 0: 10705.1. Samples: 220706886. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:02:58,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:02:59,525][626795] Updated weights for policy 0, policy_version 229852 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:01,388][626795] Updated weights for policy 0, policy_version 229862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:03,387][626795] Updated weights for policy 0, policy_version 229872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:03,975][24592] Fps is (10 sec: 42603.1, 60 sec: 42871.4, 300 sec: 42681.7). Total num frames: 1883136000. Throughput: 0: 10710.3. Samples: 220771032. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:03,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000229875_1883136000.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:04,067][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000228625_1872896000.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:05,327][626795] Updated weights for policy 0, policy_version 229882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:07,166][626795] Updated weights for policy 0, policy_version 229892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:08,975][24592] Fps is (10 sec: 43418.6, 60 sec: 42871.5, 300 sec: 42653.9). Total num frames: 1883348992. Throughput: 0: 10698.0. Samples: 220834914. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:08,977][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:09,150][626795] Updated weights for policy 0, policy_version 229902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:11,012][626795] Updated weights for policy 0, policy_version 229912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:12,960][626795] Updated weights for policy 0, policy_version 229922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42735.5, 300 sec: 42626.2). Total num frames: 1883553792. Throughput: 0: 10678.6. Samples: 220866246. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:13,977][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:14,945][626795] Updated weights for policy 0, policy_version 229932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:16,780][626795] Updated weights for policy 0, policy_version 229942 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:18,650][626795] Updated weights for policy 0, policy_version 229952 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42735.0, 300 sec: 42626.3). Total num frames: 1883766784. Throughput: 0: 10692.0. Samples: 220930950. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:18,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:20,666][626795] Updated weights for policy 0, policy_version 229962 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:22,607][626795] Updated weights for policy 0, policy_version 229972 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:23,976][24592] Fps is (10 sec: 42596.8, 60 sec: 42598.1, 300 sec: 42598.4). Total num frames: 1883979776. Throughput: 0: 10672.7. Samples: 220994568. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:23,978][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:24,564][626795] Updated weights for policy 0, policy_version 229982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:26,534][626795] Updated weights for policy 0, policy_version 229992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:28,377][626795] Updated weights for policy 0, policy_version 230002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:28,975][24592] Fps is (10 sec: 43417.3, 60 sec: 42871.4, 300 sec: 42653.9). Total num frames: 1884200960. Throughput: 0: 10673.1. Samples: 221026314. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:28,977][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:30,317][626795] Updated weights for policy 0, policy_version 230012 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:32,149][626795] Updated weights for policy 0, policy_version 230022 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:33,975][24592] Fps is (10 sec: 43419.5, 60 sec: 42734.9, 300 sec: 42681.8). Total num frames: 1884413952. Throughput: 0: 10663.3. Samples: 221090442. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:33,976][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:34,187][626795] Updated weights for policy 0, policy_version 230032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:35,985][626795] Updated weights for policy 0, policy_version 230042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:37,874][626795] Updated weights for policy 0, policy_version 230052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:38,976][24592] Fps is (10 sec: 42594.9, 60 sec: 42734.4, 300 sec: 42653.8). Total num frames: 1884626944. Throughput: 0: 10659.0. Samples: 221154762. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:38,977][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:39,826][626795] Updated weights for policy 0, policy_version 230062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:41,735][626795] Updated weights for policy 0, policy_version 230072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:43,639][626795] Updated weights for policy 0, policy_version 230082 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:43,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42735.0, 300 sec: 42653.9). Total num frames: 1884839936. Throughput: 0: 10674.5. Samples: 221187234. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:43,976][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:45,635][626795] Updated weights for policy 0, policy_version 230092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:47,476][626795] Updated weights for policy 0, policy_version 230102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:48,975][24592] Fps is (10 sec: 42602.1, 60 sec: 42734.9, 300 sec: 42654.0). Total num frames: 1885052928. Throughput: 0: 10670.1. Samples: 221251188. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:48,976][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:49,408][626795] Updated weights for policy 0, policy_version 230112 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:51,407][626795] Updated weights for policy 0, policy_version 230122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:53,188][626795] Updated weights for policy 0, policy_version 230132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:53,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42599.2, 300 sec: 42654.0). Total num frames: 1885265920. Throughput: 0: 10667.9. Samples: 221314968. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:53,976][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:55,233][626795] Updated weights for policy 0, policy_version 230142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:57,189][626795] Updated weights for policy 0, policy_version 230152 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:58,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42735.1, 300 sec: 42653.9). Total num frames: 1885478912. Throughput: 0: 10666.5. Samples: 221346240. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:03:58,976][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:03:59,097][626795] Updated weights for policy 0, policy_version 230162 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:01,046][626795] Updated weights for policy 0, policy_version 230172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:02,890][626795] Updated weights for policy 0, policy_version 230182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:03,977][24592] Fps is (10 sec: 42593.4, 60 sec: 42597.6, 300 sec: 42653.8). Total num frames: 1885691904. Throughput: 0: 10650.0. Samples: 221410212. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:03,977][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:04,894][626795] Updated weights for policy 0, policy_version 230192 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:06,847][626795] Updated weights for policy 0, policy_version 230202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:08,716][626795] Updated weights for policy 0, policy_version 230212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:08,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42598.4, 300 sec: 42653.9). Total num frames: 1885904896. Throughput: 0: 10641.6. Samples: 221473434. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:08,977][24592] Avg episode reward: [(0, '4.925')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:10,654][626795] Updated weights for policy 0, policy_version 230222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:12,553][626795] Updated weights for policy 0, policy_version 230232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:13,976][24592] Fps is (10 sec: 42599.9, 60 sec: 42734.4, 300 sec: 42626.1). Total num frames: 1886117888. Throughput: 0: 10650.4. Samples: 221505588. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:13,977][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:14,535][626795] Updated weights for policy 0, policy_version 230242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:16,333][626795] Updated weights for policy 0, policy_version 230252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:18,335][626795] Updated weights for policy 0, policy_version 230262 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:18,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42734.9, 300 sec: 42626.2). Total num frames: 1886330880. Throughput: 0: 10659.7. Samples: 221570130. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:18,976][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:20,223][626795] Updated weights for policy 0, policy_version 230272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:22,136][626795] Updated weights for policy 0, policy_version 230282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:23,975][24592] Fps is (10 sec: 42601.7, 60 sec: 42735.2, 300 sec: 42653.9). Total num frames: 1886543872. Throughput: 0: 10673.8. Samples: 221635074. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:23,977][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:24,048][626795] Updated weights for policy 0, policy_version 230292 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:25,999][626795] Updated weights for policy 0, policy_version 230302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:27,775][626795] Updated weights for policy 0, policy_version 230312 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:28,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42598.4, 300 sec: 42654.0). Total num frames: 1886756864. Throughput: 0: 10652.5. Samples: 221666598. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:28,976][24592] Avg episode reward: [(0, '4.791')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:29,781][626795] Updated weights for policy 0, policy_version 230322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:31,667][626795] Updated weights for policy 0, policy_version 230332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:33,624][626795] Updated weights for policy 0, policy_version 230342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:33,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42598.4, 300 sec: 42653.9). Total num frames: 1886969856. Throughput: 0: 10669.3. Samples: 221731308. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:33,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:35,508][626795] Updated weights for policy 0, policy_version 230352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:37,367][626795] Updated weights for policy 0, policy_version 230362 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:38,975][24592] Fps is (10 sec: 43417.8, 60 sec: 42735.6, 300 sec: 42681.7). Total num frames: 1887191040. Throughput: 0: 10683.7. Samples: 221795736. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:38,976][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:39,264][626795] Updated weights for policy 0, policy_version 230372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:41,294][626795] Updated weights for policy 0, policy_version 230382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:43,138][626795] Updated weights for policy 0, policy_version 230392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:43,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42734.9, 300 sec: 42681.7). Total num frames: 1887404032. Throughput: 0: 10701.1. Samples: 221827788. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:43,977][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:44,957][626795] Updated weights for policy 0, policy_version 230402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:46,911][626795] Updated weights for policy 0, policy_version 230412 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:48,770][626795] Updated weights for policy 0, policy_version 230422 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:48,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42735.0, 300 sec: 42681.7). Total num frames: 1887617024. Throughput: 0: 10719.5. Samples: 221892576. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:48,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:50,717][626795] Updated weights for policy 0, policy_version 230432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:52,567][626795] Updated weights for policy 0, policy_version 230442 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:53,975][24592] Fps is (10 sec: 43417.9, 60 sec: 42871.5, 300 sec: 42709.5). Total num frames: 1887838208. Throughput: 0: 10754.8. Samples: 221957400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:53,976][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:54,537][626795] Updated weights for policy 0, policy_version 230452 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:56,352][626795] Updated weights for policy 0, policy_version 230462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:04:58,317][626795] Updated weights for policy 0, policy_version 230472 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:58,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42871.5, 300 sec: 42709.5). Total num frames: 1888051200. Throughput: 0: 10746.3. Samples: 221989164. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:04:58,977][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:00,192][626795] Updated weights for policy 0, policy_version 230482 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:02,227][626795] Updated weights for policy 0, policy_version 230492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:03,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42872.3, 300 sec: 42681.7). Total num frames: 1888264192. Throughput: 0: 10729.3. Samples: 222052950. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:03,977][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000230501_1888264192.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:04,059][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000229250_1878016000.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:04,147][626795] Updated weights for policy 0, policy_version 230502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:06,058][626795] Updated weights for policy 0, policy_version 230512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:07,880][626795] Updated weights for policy 0, policy_version 230522 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42871.5, 300 sec: 42709.5). Total num frames: 1888477184. Throughput: 0: 10718.8. Samples: 222117420. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:08,977][24592] Avg episode reward: [(0, '4.420')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:09,940][626795] Updated weights for policy 0, policy_version 230532 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:11,705][626795] Updated weights for policy 0, policy_version 230542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:13,673][626795] Updated weights for policy 0, policy_version 230552 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:13,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42872.0, 300 sec: 42709.5). Total num frames: 1888690176. Throughput: 0: 10731.9. Samples: 222149532. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:13,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:15,596][626795] Updated weights for policy 0, policy_version 230562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:17,471][626795] Updated weights for policy 0, policy_version 230572 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:18,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42871.5, 300 sec: 42709.5). Total num frames: 1888903168. Throughput: 0: 10730.2. Samples: 222214164. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:18,977][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:19,407][626795] Updated weights for policy 0, policy_version 230582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:21,331][626795] Updated weights for policy 0, policy_version 230592 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:23,177][626795] Updated weights for policy 0, policy_version 230602 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:23,975][24592] Fps is (10 sec: 43417.6, 60 sec: 43008.0, 300 sec: 42737.2). Total num frames: 1889124352. Throughput: 0: 10722.7. Samples: 222278256. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:23,977][24592] Avg episode reward: [(0, '4.394')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:25,176][626795] Updated weights for policy 0, policy_version 230612 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:27,142][626795] Updated weights for policy 0, policy_version 230622 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:28,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42871.5, 300 sec: 42709.5). Total num frames: 1889329152. Throughput: 0: 10713.6. Samples: 222309900. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:28,976][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:29,144][626795] Updated weights for policy 0, policy_version 230632 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:31,020][626795] Updated weights for policy 0, policy_version 230642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:33,042][626795] Updated weights for policy 0, policy_version 230652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:33,976][24592] Fps is (10 sec: 41778.2, 60 sec: 42871.3, 300 sec: 42709.4). Total num frames: 1889542144. Throughput: 0: 10664.6. Samples: 222372486. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:33,979][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:34,936][626795] Updated weights for policy 0, policy_version 230662 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:36,907][626795] Updated weights for policy 0, policy_version 230672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:38,849][626795] Updated weights for policy 0, policy_version 230682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:38,976][24592] Fps is (10 sec: 42596.3, 60 sec: 42734.6, 300 sec: 42709.5). Total num frames: 1889755136. Throughput: 0: 10638.3. Samples: 222436128. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:38,977][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:40,655][626795] Updated weights for policy 0, policy_version 230692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:42,609][626795] Updated weights for policy 0, policy_version 230702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:43,975][24592] Fps is (10 sec: 42599.5, 60 sec: 42735.0, 300 sec: 42709.5). Total num frames: 1889968128. Throughput: 0: 10639.5. Samples: 222467940. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:43,976][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:44,562][626795] Updated weights for policy 0, policy_version 230712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:46,419][626795] Updated weights for policy 0, policy_version 230722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:48,383][626795] Updated weights for policy 0, policy_version 230732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:48,976][24592] Fps is (10 sec: 41780.4, 60 sec: 42598.2, 300 sec: 42711.7). Total num frames: 1890172928. Throughput: 0: 10662.9. Samples: 222532782. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:48,976][24592] Avg episode reward: [(0, '4.880')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:50,447][626795] Updated weights for policy 0, policy_version 230742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:52,353][626795] Updated weights for policy 0, policy_version 230752 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:53,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42461.9, 300 sec: 42681.8). Total num frames: 1890385920. Throughput: 0: 10614.9. Samples: 222595092. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:53,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:54,184][626795] Updated weights for policy 0, policy_version 230762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:56,178][626795] Updated weights for policy 0, policy_version 230772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:05:58,051][626795] Updated weights for policy 0, policy_version 230782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:58,975][24592] Fps is (10 sec: 42599.2, 60 sec: 42461.8, 300 sec: 42681.7). Total num frames: 1890598912. Throughput: 0: 10606.9. Samples: 222626844. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:05:58,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:00,033][626795] Updated weights for policy 0, policy_version 230792 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:02,057][626795] Updated weights for policy 0, policy_version 230802 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:03,789][626795] Updated weights for policy 0, policy_version 230812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:03,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42461.9, 300 sec: 42681.7). Total num frames: 1890811904. Throughput: 0: 10576.8. Samples: 222690120. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:03,977][24592] Avg episode reward: [(0, '4.931')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:05,748][626795] Updated weights for policy 0, policy_version 230822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:07,791][626795] Updated weights for policy 0, policy_version 230832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:08,976][24592] Fps is (10 sec: 42597.2, 60 sec: 42461.6, 300 sec: 42681.7). Total num frames: 1891024896. Throughput: 0: 10594.5. Samples: 222755010. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:08,977][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:09,787][626795] Updated weights for policy 0, policy_version 230842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:11,479][626795] Updated weights for policy 0, policy_version 230852 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:13,526][626795] Updated weights for policy 0, policy_version 230862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:13,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42461.9, 300 sec: 42681.7). Total num frames: 1891237888. Throughput: 0: 10588.4. Samples: 222786378. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:13,977][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:15,362][626795] Updated weights for policy 0, policy_version 230872 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:17,265][626795] Updated weights for policy 0, policy_version 230882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:18,975][24592] Fps is (10 sec: 43419.0, 60 sec: 42598.4, 300 sec: 42681.7). Total num frames: 1891459072. Throughput: 0: 10636.7. Samples: 222851136. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:18,977][24592] Avg episode reward: [(0, '4.908')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:19,156][626795] Updated weights for policy 0, policy_version 230892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:21,026][626795] Updated weights for policy 0, policy_version 230902 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:22,945][626795] Updated weights for policy 0, policy_version 230912 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:23,976][24592] Fps is (10 sec: 43416.2, 60 sec: 42461.6, 300 sec: 42709.4). Total num frames: 1891672064. Throughput: 0: 10652.6. Samples: 222915492. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:23,978][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:24,926][626795] Updated weights for policy 0, policy_version 230922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:26,858][626795] Updated weights for policy 0, policy_version 230932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:28,651][626795] Updated weights for policy 0, policy_version 230942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:28,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42598.4, 300 sec: 42681.7). Total num frames: 1891885056. Throughput: 0: 10663.3. Samples: 222947790. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:28,976][24592] Avg episode reward: [(0, '4.950')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:30,722][626795] Updated weights for policy 0, policy_version 230952 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:32,552][626795] Updated weights for policy 0, policy_version 230962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:33,975][24592] Fps is (10 sec: 42599.7, 60 sec: 42598.6, 300 sec: 42709.5). Total num frames: 1892098048. Throughput: 0: 10641.1. Samples: 223011630. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:33,978][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:34,435][626795] Updated weights for policy 0, policy_version 230972 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:36,427][626795] Updated weights for policy 0, policy_version 230982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:38,301][626795] Updated weights for policy 0, policy_version 230992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:38,976][24592] Fps is (10 sec: 42594.3, 60 sec: 42598.1, 300 sec: 42709.4). Total num frames: 1892311040. Throughput: 0: 10684.3. Samples: 223075896. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:38,980][24592] Avg episode reward: [(0, '5.015')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:40,200][626795] Updated weights for policy 0, policy_version 231002 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:40,897][626772] Signal inference workers to stop experience collection... (3000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:40,897][626772] Signal inference workers to resume experience collection... (3000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:40,909][626795] InferenceWorker_p0-w0: stopping experience collection (3000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:40,912][626795] InferenceWorker_p0-w0: resuming experience collection (3000 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:42,195][626795] Updated weights for policy 0, policy_version 231012 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:43,977][24592] Fps is (10 sec: 42590.6, 60 sec: 42597.1, 300 sec: 42709.2). Total num frames: 1892524032. Throughput: 0: 10681.4. Samples: 223107528. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:43,979][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:44,073][626795] Updated weights for policy 0, policy_version 231022 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:45,978][626795] Updated weights for policy 0, policy_version 231032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:47,940][626795] Updated weights for policy 0, policy_version 231042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:48,975][24592] Fps is (10 sec: 42602.6, 60 sec: 42735.1, 300 sec: 42681.7). Total num frames: 1892737024. Throughput: 0: 10704.3. Samples: 223171812. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:48,978][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:49,878][626795] Updated weights for policy 0, policy_version 231052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:51,663][626795] Updated weights for policy 0, policy_version 231062 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:53,609][626795] Updated weights for policy 0, policy_version 231072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:53,975][24592] Fps is (10 sec: 43425.7, 60 sec: 42871.5, 300 sec: 42737.3). Total num frames: 1892958208. Throughput: 0: 10696.1. Samples: 223236330. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:53,978][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:55,550][626795] Updated weights for policy 0, policy_version 231082 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:57,415][626795] Updated weights for policy 0, policy_version 231092 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:58,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42871.5, 300 sec: 42737.3). Total num frames: 1893171200. Throughput: 0: 10705.5. Samples: 223268124. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:06:58,977][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:06:59,340][626795] Updated weights for policy 0, policy_version 231102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:01,452][626795] Updated weights for policy 0, policy_version 231112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:03,106][626795] Updated weights for policy 0, policy_version 231122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:03,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42871.5, 300 sec: 42737.2). Total num frames: 1893384192. Throughput: 0: 10699.1. Samples: 223332594. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:03,976][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000231126_1893384192.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:04,093][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000229875_1883136000.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:05,287][626795] Updated weights for policy 0, policy_version 231132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:07,128][626795] Updated weights for policy 0, policy_version 231142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:08,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42735.1, 300 sec: 42709.6). Total num frames: 1893588992. Throughput: 0: 10674.9. Samples: 223395858. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:08,977][24592] Avg episode reward: [(0, '4.446')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:08,987][626795] Updated weights for policy 0, policy_version 231152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:11,031][626795] Updated weights for policy 0, policy_version 231162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:12,906][626795] Updated weights for policy 0, policy_version 231172 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:13,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42734.9, 300 sec: 42709.5). Total num frames: 1893801984. Throughput: 0: 10664.4. Samples: 223427688. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:13,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:14,876][626795] Updated weights for policy 0, policy_version 231182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:16,724][626795] Updated weights for policy 0, policy_version 231192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:18,566][626795] Updated weights for policy 0, policy_version 231202 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:18,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42598.4, 300 sec: 42681.7). Total num frames: 1894014976. Throughput: 0: 10671.7. Samples: 223491858. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:18,976][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:20,573][626795] Updated weights for policy 0, policy_version 231212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:22,439][626795] Updated weights for policy 0, policy_version 231222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:23,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42735.2, 300 sec: 42737.2). Total num frames: 1894236160. Throughput: 0: 10676.6. Samples: 223556334. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:23,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:24,342][626795] Updated weights for policy 0, policy_version 231232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:26,299][626795] Updated weights for policy 0, policy_version 231242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:28,147][626795] Updated weights for policy 0, policy_version 231252 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:28,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42734.9, 300 sec: 42709.5). Total num frames: 1894449152. Throughput: 0: 10682.3. Samples: 223588212. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:28,977][24592] Avg episode reward: [(0, '4.886')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:30,128][626795] Updated weights for policy 0, policy_version 231262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:32,036][626795] Updated weights for policy 0, policy_version 231272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:33,949][626795] Updated weights for policy 0, policy_version 231282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:33,976][24592] Fps is (10 sec: 42597.6, 60 sec: 42734.8, 300 sec: 42709.5). Total num frames: 1894662144. Throughput: 0: 10681.4. Samples: 223652478. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:33,977][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:35,872][626795] Updated weights for policy 0, policy_version 231292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:37,812][626795] Updated weights for policy 0, policy_version 231302 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:38,975][24592] Fps is (10 sec: 42597.8, 60 sec: 42735.5, 300 sec: 42709.5). Total num frames: 1894875136. Throughput: 0: 10680.1. Samples: 223716936. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:38,977][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:39,737][626795] Updated weights for policy 0, policy_version 231312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:41,618][626795] Updated weights for policy 0, policy_version 231322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:43,557][626795] Updated weights for policy 0, policy_version 231332 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:43,976][24592] Fps is (10 sec: 42598.5, 60 sec: 42736.1, 300 sec: 42709.5). Total num frames: 1895088128. Throughput: 0: 10676.4. Samples: 223748562. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:43,976][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:45,468][626795] Updated weights for policy 0, policy_version 231342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:47,421][626795] Updated weights for policy 0, policy_version 231352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:48,975][24592] Fps is (10 sec: 42598.9, 60 sec: 42734.9, 300 sec: 42681.9). Total num frames: 1895301120. Throughput: 0: 10657.9. Samples: 223812198. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:48,977][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:49,357][626795] Updated weights for policy 0, policy_version 231362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:51,317][626795] Updated weights for policy 0, policy_version 231372 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:53,047][626795] Updated weights for policy 0, policy_version 231382 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:53,975][24592] Fps is (10 sec: 42599.2, 60 sec: 42598.4, 300 sec: 42709.5). Total num frames: 1895514112. Throughput: 0: 10683.6. Samples: 223876620. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:53,976][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:55,108][626795] Updated weights for policy 0, policy_version 231392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:57,092][626795] Updated weights for policy 0, policy_version 231402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42461.9, 300 sec: 42653.9). Total num frames: 1895718912. Throughput: 0: 10662.0. Samples: 223907478. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:07:58,976][24592] Avg episode reward: [(0, '4.871')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:07:59,023][626795] Updated weights for policy 0, policy_version 231412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:00,926][626795] Updated weights for policy 0, policy_version 231422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:02,899][626795] Updated weights for policy 0, policy_version 231432 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:03,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42598.4, 300 sec: 42681.7). Total num frames: 1895940096. Throughput: 0: 10665.5. Samples: 223971804. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:03,976][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:04,686][626795] Updated weights for policy 0, policy_version 231442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:06,612][626795] Updated weights for policy 0, policy_version 231452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:08,636][626795] Updated weights for policy 0, policy_version 231462 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:08,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42734.9, 300 sec: 42709.5). Total num frames: 1896153088. Throughput: 0: 10644.4. Samples: 224035332. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:08,976][24592] Avg episode reward: [(0, '5.072')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:10,578][626795] Updated weights for policy 0, policy_version 231472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:12,451][626795] Updated weights for policy 0, policy_version 231482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:13,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42734.9, 300 sec: 42709.5). Total num frames: 1896366080. Throughput: 0: 10636.7. Samples: 224066862. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:13,977][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:14,415][626795] Updated weights for policy 0, policy_version 231492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:16,249][626795] Updated weights for policy 0, policy_version 231502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:18,187][626795] Updated weights for policy 0, policy_version 231512 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:18,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42598.4, 300 sec: 42681.8). Total num frames: 1896570880. Throughput: 0: 10636.9. Samples: 224131134. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:18,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:20,132][626795] Updated weights for policy 0, policy_version 231522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:22,074][626795] Updated weights for policy 0, policy_version 231532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:23,957][626795] Updated weights for policy 0, policy_version 231542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:23,976][24592] Fps is (10 sec: 42596.7, 60 sec: 42598.1, 300 sec: 42681.7). Total num frames: 1896792064. Throughput: 0: 10628.9. Samples: 224195238. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:23,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:25,939][626795] Updated weights for policy 0, policy_version 231552 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:27,764][626795] Updated weights for policy 0, policy_version 231562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:28,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42461.9, 300 sec: 42653.9). Total num frames: 1896996864. Throughput: 0: 10619.6. Samples: 224226444. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:28,976][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:29,832][626795] Updated weights for policy 0, policy_version 231572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:31,711][626795] Updated weights for policy 0, policy_version 231582 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:33,770][626795] Updated weights for policy 0, policy_version 231592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:33,975][24592] Fps is (10 sec: 41780.6, 60 sec: 42462.0, 300 sec: 42654.1). Total num frames: 1897209856. Throughput: 0: 10624.8. Samples: 224290314. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:33,977][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:35,556][626795] Updated weights for policy 0, policy_version 231602 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:37,535][626795] Updated weights for policy 0, policy_version 231612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:38,976][24592] Fps is (10 sec: 42595.7, 60 sec: 42461.5, 300 sec: 42653.8). Total num frames: 1897422848. Throughput: 0: 10606.0. Samples: 224353896. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:38,979][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:39,520][626795] Updated weights for policy 0, policy_version 231622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:41,389][626795] Updated weights for policy 0, policy_version 231632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:43,339][626795] Updated weights for policy 0, policy_version 231642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:43,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42462.0, 300 sec: 42653.9). Total num frames: 1897635840. Throughput: 0: 10627.5. Samples: 224385714. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:43,977][24592] Avg episode reward: [(0, '4.880')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:45,214][626795] Updated weights for policy 0, policy_version 231652 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:47,211][626795] Updated weights for policy 0, policy_version 231662 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:48,975][24592] Fps is (10 sec: 42600.6, 60 sec: 42461.8, 300 sec: 42653.9). Total num frames: 1897848832. Throughput: 0: 10602.4. Samples: 224448912. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:48,977][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:49,128][626795] Updated weights for policy 0, policy_version 231672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:50,997][626795] Updated weights for policy 0, policy_version 231682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:52,949][626795] Updated weights for policy 0, policy_version 231692 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:53,976][24592] Fps is (10 sec: 42596.9, 60 sec: 42461.6, 300 sec: 42653.9). Total num frames: 1898061824. Throughput: 0: 10629.8. Samples: 224513676. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:53,978][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:54,830][626795] Updated weights for policy 0, policy_version 231702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:56,896][626795] Updated weights for policy 0, policy_version 231712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:08:58,518][626795] Updated weights for policy 0, policy_version 231722 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:58,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42598.4, 300 sec: 42654.1). Total num frames: 1898274816. Throughput: 0: 10635.2. Samples: 224545446. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:08:58,976][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:00,659][626795] Updated weights for policy 0, policy_version 231732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:02,627][626795] Updated weights for policy 0, policy_version 231742 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:03,975][24592] Fps is (10 sec: 42599.7, 60 sec: 42461.8, 300 sec: 42653.9). Total num frames: 1898487808. Throughput: 0: 10617.8. Samples: 224608938. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:03,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000231749_1898487808.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:04,046][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000230501_1888264192.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:04,606][626795] Updated weights for policy 0, policy_version 231752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:06,529][626795] Updated weights for policy 0, policy_version 231762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:08,474][626795] Updated weights for policy 0, policy_version 231772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:08,976][24592] Fps is (10 sec: 41777.5, 60 sec: 42325.0, 300 sec: 42626.2). Total num frames: 1898692608. Throughput: 0: 10594.0. Samples: 224671968. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:08,977][24592] Avg episode reward: [(0, '4.477')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:10,408][626795] Updated weights for policy 0, policy_version 231782 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:12,343][626795] Updated weights for policy 0, policy_version 231792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:13,976][24592] Fps is (10 sec: 41776.8, 60 sec: 42324.9, 300 sec: 42626.1). Total num frames: 1898905600. Throughput: 0: 10601.5. Samples: 224703516. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:13,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:14,268][626795] Updated weights for policy 0, policy_version 231802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:16,225][626795] Updated weights for policy 0, policy_version 231812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:18,167][626795] Updated weights for policy 0, policy_version 231822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:18,975][24592] Fps is (10 sec: 42600.6, 60 sec: 42461.9, 300 sec: 42626.2). Total num frames: 1899118592. Throughput: 0: 10590.8. Samples: 224766900. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:18,976][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:20,025][626795] Updated weights for policy 0, policy_version 231832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:22,002][626795] Updated weights for policy 0, policy_version 231842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:23,855][626795] Updated weights for policy 0, policy_version 231852 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:23,975][24592] Fps is (10 sec: 42600.6, 60 sec: 42325.6, 300 sec: 42626.2). Total num frames: 1899331584. Throughput: 0: 10609.7. Samples: 224831328. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:23,976][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:25,792][626795] Updated weights for policy 0, policy_version 231862 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:27,775][626795] Updated weights for policy 0, policy_version 231872 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:28,976][24592] Fps is (10 sec: 42597.7, 60 sec: 42461.8, 300 sec: 42626.2). Total num frames: 1899544576. Throughput: 0: 10608.6. Samples: 224863104. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:28,977][24592] Avg episode reward: [(0, '4.838')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:29,634][626795] Updated weights for policy 0, policy_version 231882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:31,609][626795] Updated weights for policy 0, policy_version 231892 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:33,453][626795] Updated weights for policy 0, policy_version 231902 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:33,976][24592] Fps is (10 sec: 42596.3, 60 sec: 42461.5, 300 sec: 42598.3). Total num frames: 1899757568. Throughput: 0: 10625.6. Samples: 224927070. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:33,977][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:35,459][626795] Updated weights for policy 0, policy_version 231912 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:37,320][626795] Updated weights for policy 0, policy_version 231922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:38,976][24592] Fps is (10 sec: 42598.3, 60 sec: 42462.2, 300 sec: 42598.4). Total num frames: 1899970560. Throughput: 0: 10604.3. Samples: 224990868. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:38,978][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:39,329][626795] Updated weights for policy 0, policy_version 231932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:41,167][626795] Updated weights for policy 0, policy_version 231942 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:43,120][626795] Updated weights for policy 0, policy_version 231952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:43,975][24592] Fps is (10 sec: 42600.8, 60 sec: 42461.9, 300 sec: 42598.4). Total num frames: 1900183552. Throughput: 0: 10603.6. Samples: 225022608. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:43,976][24592] Avg episode reward: [(0, '4.909')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:45,015][626795] Updated weights for policy 0, policy_version 231962 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:47,033][626795] Updated weights for policy 0, policy_version 231972 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:48,889][626795] Updated weights for policy 0, policy_version 231982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:48,976][24592] Fps is (10 sec: 42598.5, 60 sec: 42461.8, 300 sec: 42570.6). Total num frames: 1900396544. Throughput: 0: 10620.1. Samples: 225086844. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:48,977][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:50,866][626795] Updated weights for policy 0, policy_version 231992 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:52,781][626795] Updated weights for policy 0, policy_version 232002 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:53,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42462.1, 300 sec: 42570.6). Total num frames: 1900609536. Throughput: 0: 10632.5. Samples: 225150426. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:53,977][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:54,612][626795] Updated weights for policy 0, policy_version 232012 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:56,539][626795] Updated weights for policy 0, policy_version 232022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:09:58,564][626795] Updated weights for policy 0, policy_version 232032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:58,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42461.9, 300 sec: 42570.6). Total num frames: 1900822528. Throughput: 0: 10640.8. Samples: 225182346. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:09:58,977][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:00,413][626795] Updated weights for policy 0, policy_version 232042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:02,370][626795] Updated weights for policy 0, policy_version 232052 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:03,976][24592] Fps is (10 sec: 42596.4, 60 sec: 42461.6, 300 sec: 42570.6). Total num frames: 1901035520. Throughput: 0: 10651.9. Samples: 225246240. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:03,981][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:04,329][626795] Updated weights for policy 0, policy_version 232062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:06,165][626795] Updated weights for policy 0, policy_version 232072 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:08,117][626795] Updated weights for policy 0, policy_version 232082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42598.8, 300 sec: 42570.6). Total num frames: 1901248512. Throughput: 0: 10639.0. Samples: 225310080. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:08,976][24592] Avg episode reward: [(0, '4.367')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:09,989][626795] Updated weights for policy 0, policy_version 232092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:11,948][626795] Updated weights for policy 0, policy_version 232102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:13,863][626795] Updated weights for policy 0, policy_version 232112 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:13,976][24592] Fps is (10 sec: 42599.4, 60 sec: 42598.7, 300 sec: 42570.6). Total num frames: 1901461504. Throughput: 0: 10641.3. Samples: 225341964. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:13,977][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:15,952][626795] Updated weights for policy 0, policy_version 232122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:17,838][626795] Updated weights for policy 0, policy_version 232132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42461.8, 300 sec: 42515.1). Total num frames: 1901666304. Throughput: 0: 10630.1. Samples: 225405420. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:18,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:19,774][626795] Updated weights for policy 0, policy_version 232142 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:21,693][626795] Updated weights for policy 0, policy_version 232152 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:23,580][626795] Updated weights for policy 0, policy_version 232162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:23,975][24592] Fps is (10 sec: 41780.2, 60 sec: 42461.9, 300 sec: 42542.9). Total num frames: 1901879296. Throughput: 0: 10628.4. Samples: 225469146. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:23,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:25,587][626795] Updated weights for policy 0, policy_version 232172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:27,354][626795] Updated weights for policy 0, policy_version 232182 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:28,976][24592] Fps is (10 sec: 43416.7, 60 sec: 42598.3, 300 sec: 42570.6). Total num frames: 1902100480. Throughput: 0: 10630.9. Samples: 225501000. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:28,977][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:29,348][626795] Updated weights for policy 0, policy_version 232192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:31,140][626795] Updated weights for policy 0, policy_version 232202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:33,237][626795] Updated weights for policy 0, policy_version 232212 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:33,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42462.3, 300 sec: 42542.9). Total num frames: 1902305280. Throughput: 0: 10620.6. Samples: 225564768. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:33,977][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:35,001][626795] Updated weights for policy 0, policy_version 232222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:36,937][626795] Updated weights for policy 0, policy_version 232232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:38,897][626795] Updated weights for policy 0, policy_version 232242 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:38,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42598.5, 300 sec: 42570.6). Total num frames: 1902526464. Throughput: 0: 10649.4. Samples: 225629652. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:38,976][24592] Avg episode reward: [(0, '5.037')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:40,786][626795] Updated weights for policy 0, policy_version 232252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:42,726][626795] Updated weights for policy 0, policy_version 232262 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:43,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42598.4, 300 sec: 42598.4). Total num frames: 1902739456. Throughput: 0: 10654.0. Samples: 225661776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:43,976][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:44,678][626795] Updated weights for policy 0, policy_version 232272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:46,616][626795] Updated weights for policy 0, policy_version 232282 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:48,554][626795] Updated weights for policy 0, policy_version 232292 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:48,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42598.5, 300 sec: 42598.4). Total num frames: 1902952448. Throughput: 0: 10649.3. Samples: 225725454. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:48,977][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:50,425][626795] Updated weights for policy 0, policy_version 232302 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:52,396][626795] Updated weights for policy 0, policy_version 232312 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:53,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42598.3, 300 sec: 42598.4). Total num frames: 1903165440. Throughput: 0: 10644.1. Samples: 225789066. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:53,978][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:54,309][626795] Updated weights for policy 0, policy_version 232322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:56,263][626795] Updated weights for policy 0, policy_version 232332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:10:58,239][626795] Updated weights for policy 0, policy_version 232342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42461.9, 300 sec: 42570.6). Total num frames: 1903370240. Throughput: 0: 10634.7. Samples: 225820524. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:10:58,976][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:00,241][626795] Updated weights for policy 0, policy_version 232352 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:02,299][626795] Updated weights for policy 0, policy_version 232362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:03,975][24592] Fps is (10 sec: 41779.5, 60 sec: 42462.2, 300 sec: 42570.7). Total num frames: 1903583232. Throughput: 0: 10600.4. Samples: 225882438. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:03,977][24592] Avg episode reward: [(0, '4.911')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000232371_1903583232.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:04,060][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000231126_1893384192.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:04,167][626795] Updated weights for policy 0, policy_version 232372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:06,207][626795] Updated weights for policy 0, policy_version 232382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:08,006][626795] Updated weights for policy 0, policy_version 232392 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:08,976][24592] Fps is (10 sec: 41778.2, 60 sec: 42325.2, 300 sec: 42542.8). Total num frames: 1903788032. Throughput: 0: 10601.8. Samples: 225946230. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:08,977][24592] Avg episode reward: [(0, '4.906')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:10,052][626795] Updated weights for policy 0, policy_version 232402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:11,829][626795] Updated weights for policy 0, policy_version 232412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:13,785][626795] Updated weights for policy 0, policy_version 232422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:13,976][24592] Fps is (10 sec: 42596.6, 60 sec: 42461.7, 300 sec: 42542.8). Total num frames: 1904009216. Throughput: 0: 10607.3. Samples: 225978330. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:13,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:15,709][626795] Updated weights for policy 0, policy_version 232432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:17,643][626795] Updated weights for policy 0, policy_version 232442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:18,976][24592] Fps is (10 sec: 42596.5, 60 sec: 42461.4, 300 sec: 42515.0). Total num frames: 1904214016. Throughput: 0: 10609.8. Samples: 226042218. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:18,978][24592] Avg episode reward: [(0, '4.937')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:19,559][626795] Updated weights for policy 0, policy_version 232452 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:21,449][626795] Updated weights for policy 0, policy_version 232462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:23,354][626795] Updated weights for policy 0, policy_version 232472 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:23,975][24592] Fps is (10 sec: 41780.9, 60 sec: 42461.9, 300 sec: 42515.1). Total num frames: 1904427008. Throughput: 0: 10591.1. Samples: 226106250. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:23,977][24592] Avg episode reward: [(0, '4.490')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:25,272][626795] Updated weights for policy 0, policy_version 232482 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:27,237][626795] Updated weights for policy 0, policy_version 232492 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:28,975][24592] Fps is (10 sec: 43420.5, 60 sec: 42462.0, 300 sec: 42542.9). Total num frames: 1904648192. Throughput: 0: 10594.9. Samples: 226138548. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:28,976][24592] Avg episode reward: [(0, '4.336')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:29,108][626795] Updated weights for policy 0, policy_version 232502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:31,022][626795] Updated weights for policy 0, policy_version 232512 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:33,006][626795] Updated weights for policy 0, policy_version 232522 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:33,976][24592] Fps is (10 sec: 43417.1, 60 sec: 42598.3, 300 sec: 42543.0). Total num frames: 1904861184. Throughput: 0: 10606.4. Samples: 226202742. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:33,977][24592] Avg episode reward: [(0, '4.910')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:34,819][626795] Updated weights for policy 0, policy_version 232532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:36,718][626795] Updated weights for policy 0, policy_version 232542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:38,648][626795] Updated weights for policy 0, policy_version 232552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:38,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42461.9, 300 sec: 42543.1). Total num frames: 1905074176. Throughput: 0: 10621.2. Samples: 226267020. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:38,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:40,517][626795] Updated weights for policy 0, policy_version 232562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:42,386][626795] Updated weights for policy 0, policy_version 232572 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:43,975][24592] Fps is (10 sec: 43418.2, 60 sec: 42598.4, 300 sec: 42570.6). Total num frames: 1905295360. Throughput: 0: 10640.1. Samples: 226299330. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:43,976][24592] Avg episode reward: [(0, '4.900')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:44,421][626795] Updated weights for policy 0, policy_version 232582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:46,240][626795] Updated weights for policy 0, policy_version 232592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:48,274][626795] Updated weights for policy 0, policy_version 232602 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:48,976][24592] Fps is (10 sec: 42596.6, 60 sec: 42461.5, 300 sec: 42515.0). Total num frames: 1905500160. Throughput: 0: 10697.6. Samples: 226363836. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:48,978][24592] Avg episode reward: [(0, '4.915')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:50,140][626795] Updated weights for policy 0, policy_version 232612 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:52,032][626795] Updated weights for policy 0, policy_version 232622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:53,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42598.4, 300 sec: 42542.9). Total num frames: 1905721344. Throughput: 0: 10715.1. Samples: 226428408. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:53,976][626795] Updated weights for policy 0, policy_version 232632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:53,977][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:55,852][626795] Updated weights for policy 0, policy_version 232642 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:57,627][626795] Updated weights for policy 0, policy_version 232652 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:58,975][24592] Fps is (10 sec: 44238.9, 60 sec: 42871.5, 300 sec: 42570.6). Total num frames: 1905942528. Throughput: 0: 10709.0. Samples: 226460232. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:11:58,977][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:11:59,625][626795] Updated weights for policy 0, policy_version 232662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:01,544][626795] Updated weights for policy 0, policy_version 232672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:03,492][626795] Updated weights for policy 0, policy_version 232682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:03,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42871.5, 300 sec: 42598.4). Total num frames: 1906155520. Throughput: 0: 10733.4. Samples: 226525212. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:03,977][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:05,438][626795] Updated weights for policy 0, policy_version 232692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:07,314][626795] Updated weights for policy 0, policy_version 232702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:08,975][24592] Fps is (10 sec: 42598.1, 60 sec: 43008.1, 300 sec: 42598.4). Total num frames: 1906368512. Throughput: 0: 10726.9. Samples: 226588962. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:08,977][24592] Avg episode reward: [(0, '4.418')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:09,300][626795] Updated weights for policy 0, policy_version 232712 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:11,081][626795] Updated weights for policy 0, policy_version 232722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:13,118][626795] Updated weights for policy 0, policy_version 232732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:13,976][24592] Fps is (10 sec: 41775.2, 60 sec: 42734.5, 300 sec: 42570.5). Total num frames: 1906573312. Throughput: 0: 10714.0. Samples: 226620690. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:13,977][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:15,001][626795] Updated weights for policy 0, policy_version 232742 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:16,892][626795] Updated weights for policy 0, policy_version 232752 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:18,798][626795] Updated weights for policy 0, policy_version 232762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:18,975][24592] Fps is (10 sec: 42598.7, 60 sec: 43008.5, 300 sec: 42570.6). Total num frames: 1906794496. Throughput: 0: 10715.1. Samples: 226684920. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:18,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:20,733][626795] Updated weights for policy 0, policy_version 232772 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:22,565][626795] Updated weights for policy 0, policy_version 232782 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:23,975][24592] Fps is (10 sec: 43421.7, 60 sec: 43008.0, 300 sec: 42570.6). Total num frames: 1907007488. Throughput: 0: 10720.3. Samples: 226749432. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:23,976][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:24,548][626795] Updated weights for policy 0, policy_version 232792 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:26,535][626795] Updated weights for policy 0, policy_version 232802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:28,340][626795] Updated weights for policy 0, policy_version 232812 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:28,976][24592] Fps is (10 sec: 42598.2, 60 sec: 42871.4, 300 sec: 42570.7). Total num frames: 1907220480. Throughput: 0: 10705.5. Samples: 226781076. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:28,978][24592] Avg episode reward: [(0, '4.964')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:30,261][626795] Updated weights for policy 0, policy_version 232822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:32,222][626795] Updated weights for policy 0, policy_version 232832 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:33,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42871.6, 300 sec: 42570.6). Total num frames: 1907433472. Throughput: 0: 10704.4. Samples: 226845528. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:33,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:34,153][626795] Updated weights for policy 0, policy_version 232842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:36,108][626795] Updated weights for policy 0, policy_version 232852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:37,870][626795] Updated weights for policy 0, policy_version 232862 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:38,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42871.5, 300 sec: 42570.7). Total num frames: 1907646464. Throughput: 0: 10694.7. Samples: 226909668. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:38,976][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:39,878][626795] Updated weights for policy 0, policy_version 232872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:41,829][626795] Updated weights for policy 0, policy_version 232882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:43,609][626795] Updated weights for policy 0, policy_version 232892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:43,976][24592] Fps is (10 sec: 42596.9, 60 sec: 42734.7, 300 sec: 42570.6). Total num frames: 1907859456. Throughput: 0: 10699.1. Samples: 226941696. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:43,976][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:45,543][626795] Updated weights for policy 0, policy_version 232902 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:47,658][626795] Updated weights for policy 0, policy_version 232912 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:48,976][24592] Fps is (10 sec: 42597.1, 60 sec: 42871.6, 300 sec: 42570.6). Total num frames: 1908072448. Throughput: 0: 10669.3. Samples: 227005332. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:48,977][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:49,424][626795] Updated weights for policy 0, policy_version 232922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:51,407][626795] Updated weights for policy 0, policy_version 232932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:53,346][626795] Updated weights for policy 0, policy_version 232942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:53,975][24592] Fps is (10 sec: 42600.1, 60 sec: 42735.0, 300 sec: 42598.4). Total num frames: 1908285440. Throughput: 0: 10685.6. Samples: 227069814. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:53,976][24592] Avg episode reward: [(0, '4.974')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:55,179][626795] Updated weights for policy 0, policy_version 232952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:56,463][626772] Signal inference workers to stop experience collection... (3050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:56,463][626772] Signal inference workers to resume experience collection... (3050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:56,476][626795] InferenceWorker_p0-w0: stopping experience collection (3050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:56,482][626795] InferenceWorker_p0-w0: resuming experience collection (3050 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:57,204][626795] Updated weights for policy 0, policy_version 232962 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:58,975][24592] Fps is (10 sec: 42599.8, 60 sec: 42598.4, 300 sec: 42570.6). Total num frames: 1908498432. Throughput: 0: 10677.2. Samples: 227101152. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:12:58,976][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:12:59,197][626795] Updated weights for policy 0, policy_version 232972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:00,994][626795] Updated weights for policy 0, policy_version 232982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:02,957][626795] Updated weights for policy 0, policy_version 232992 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:03,975][24592] Fps is (10 sec: 42597.9, 60 sec: 42598.4, 300 sec: 42570.6). Total num frames: 1908711424. Throughput: 0: 10669.2. Samples: 227165034. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:03,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000232997_1908711424.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:04,056][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000231749_1898487808.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:05,042][626795] Updated weights for policy 0, policy_version 233002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:06,791][626795] Updated weights for policy 0, policy_version 233012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:08,817][626795] Updated weights for policy 0, policy_version 233022 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:08,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42598.4, 300 sec: 42570.6). Total num frames: 1908924416. Throughput: 0: 10638.5. Samples: 227228166. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:08,978][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:10,738][626795] Updated weights for policy 0, policy_version 233032 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:12,591][626795] Updated weights for policy 0, policy_version 233042 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:13,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42599.1, 300 sec: 42570.6). Total num frames: 1909129216. Throughput: 0: 10655.3. Samples: 227260566. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:13,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:14,581][626795] Updated weights for policy 0, policy_version 233052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:16,439][626795] Updated weights for policy 0, policy_version 233062 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:18,348][626795] Updated weights for policy 0, policy_version 233072 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:18,976][24592] Fps is (10 sec: 42598.1, 60 sec: 42598.3, 300 sec: 42570.7). Total num frames: 1909350400. Throughput: 0: 10637.2. Samples: 227324202. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:18,977][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:20,345][626795] Updated weights for policy 0, policy_version 233082 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:22,169][626795] Updated weights for policy 0, policy_version 233092 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:23,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42598.4, 300 sec: 42598.4). Total num frames: 1909563392. Throughput: 0: 10641.2. Samples: 227388522. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:23,976][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:24,066][626795] Updated weights for policy 0, policy_version 233102 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:26,076][626795] Updated weights for policy 0, policy_version 233112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:27,976][626795] Updated weights for policy 0, policy_version 233122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:28,975][24592] Fps is (10 sec: 42599.2, 60 sec: 42598.4, 300 sec: 42598.4). Total num frames: 1909776384. Throughput: 0: 10640.8. Samples: 227420526. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:28,976][24592] Avg episode reward: [(0, '4.899')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:30,011][626795] Updated weights for policy 0, policy_version 233132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:31,801][626795] Updated weights for policy 0, policy_version 233142 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:33,794][626795] Updated weights for policy 0, policy_version 233152 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:33,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42598.4, 300 sec: 42598.5). Total num frames: 1909989376. Throughput: 0: 10644.9. Samples: 227484348. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:33,976][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:35,725][626795] Updated weights for policy 0, policy_version 233162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:37,643][626795] Updated weights for policy 0, policy_version 233172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:38,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42598.4, 300 sec: 42598.4). Total num frames: 1910202368. Throughput: 0: 10632.5. Samples: 227548278. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:38,977][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:39,521][626795] Updated weights for policy 0, policy_version 233182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:41,517][626795] Updated weights for policy 0, policy_version 233192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:43,420][626795] Updated weights for policy 0, policy_version 233202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:43,977][24592] Fps is (10 sec: 41773.1, 60 sec: 42461.0, 300 sec: 42570.4). Total num frames: 1910407168. Throughput: 0: 10632.8. Samples: 227579646. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:43,978][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:45,337][626795] Updated weights for policy 0, policy_version 233212 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:47,184][626795] Updated weights for policy 0, policy_version 233222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:48,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42598.6, 300 sec: 42598.5). Total num frames: 1910628352. Throughput: 0: 10641.8. Samples: 227643912. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:48,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:49,178][626795] Updated weights for policy 0, policy_version 233232 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:51,079][626795] Updated weights for policy 0, policy_version 233242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:52,928][626795] Updated weights for policy 0, policy_version 233252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:53,976][24592] Fps is (10 sec: 43423.4, 60 sec: 42598.3, 300 sec: 42598.4). Total num frames: 1910841344. Throughput: 0: 10671.7. Samples: 227708394. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:53,977][24592] Avg episode reward: [(0, '4.928')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:54,810][626795] Updated weights for policy 0, policy_version 233262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:56,892][626795] Updated weights for policy 0, policy_version 233272 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:13:58,810][626795] Updated weights for policy 0, policy_version 233282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:58,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42598.4, 300 sec: 42598.4). Total num frames: 1911054336. Throughput: 0: 10649.1. Samples: 227739774. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:13:58,977][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:00,658][626795] Updated weights for policy 0, policy_version 233292 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:02,589][626795] Updated weights for policy 0, policy_version 233302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:03,976][24592] Fps is (10 sec: 42598.0, 60 sec: 42598.3, 300 sec: 42626.2). Total num frames: 1911267328. Throughput: 0: 10658.0. Samples: 227803812. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:03,977][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:04,629][626795] Updated weights for policy 0, policy_version 233312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:06,486][626795] Updated weights for policy 0, policy_version 233322 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:08,387][626795] Updated weights for policy 0, policy_version 233332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:08,976][24592] Fps is (10 sec: 42595.4, 60 sec: 42598.0, 300 sec: 42626.2). Total num frames: 1911480320. Throughput: 0: 10647.7. Samples: 227867676. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:08,977][24592] Avg episode reward: [(0, '4.915')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:10,389][626795] Updated weights for policy 0, policy_version 233342 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:12,340][626795] Updated weights for policy 0, policy_version 233352 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:13,975][24592] Fps is (10 sec: 42599.6, 60 sec: 42735.0, 300 sec: 42626.2). Total num frames: 1911693312. Throughput: 0: 10632.8. Samples: 227899002. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:13,976][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:14,270][626795] Updated weights for policy 0, policy_version 233362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:16,103][626795] Updated weights for policy 0, policy_version 233372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:18,051][626795] Updated weights for policy 0, policy_version 233382 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:18,975][24592] Fps is (10 sec: 41782.1, 60 sec: 42462.0, 300 sec: 42598.4). Total num frames: 1911898112. Throughput: 0: 10643.9. Samples: 227963322. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:18,976][24592] Avg episode reward: [(0, '4.985')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:19,914][626795] Updated weights for policy 0, policy_version 233392 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:21,879][626795] Updated weights for policy 0, policy_version 233402 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:23,799][626795] Updated weights for policy 0, policy_version 233412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:23,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42598.4, 300 sec: 42626.2). Total num frames: 1912119296. Throughput: 0: 10654.7. Samples: 228027738. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:23,977][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:25,531][626795] Updated weights for policy 0, policy_version 233422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:27,562][626795] Updated weights for policy 0, policy_version 233432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:28,976][24592] Fps is (10 sec: 43415.9, 60 sec: 42598.1, 300 sec: 42626.2). Total num frames: 1912332288. Throughput: 0: 10670.3. Samples: 228059796. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:28,977][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:29,527][626795] Updated weights for policy 0, policy_version 233442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:31,360][626795] Updated weights for policy 0, policy_version 233452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:33,284][626795] Updated weights for policy 0, policy_version 233462 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:33,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42598.5, 300 sec: 42626.2). Total num frames: 1912545280. Throughput: 0: 10676.8. Samples: 228124368. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:33,977][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:35,272][626795] Updated weights for policy 0, policy_version 233472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:37,168][626795] Updated weights for policy 0, policy_version 233482 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:38,975][24592] Fps is (10 sec: 42599.8, 60 sec: 42598.3, 300 sec: 42626.2). Total num frames: 1912758272. Throughput: 0: 10647.5. Samples: 228187530. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:38,977][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:39,118][626795] Updated weights for policy 0, policy_version 233492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:41,052][626795] Updated weights for policy 0, policy_version 233502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:43,000][626795] Updated weights for policy 0, policy_version 233512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:43,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42736.0, 300 sec: 42626.2). Total num frames: 1912971264. Throughput: 0: 10650.8. Samples: 228219060. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:43,978][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:44,894][626795] Updated weights for policy 0, policy_version 233522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:46,716][626795] Updated weights for policy 0, policy_version 233532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:48,739][626795] Updated weights for policy 0, policy_version 233542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:48,977][24592] Fps is (10 sec: 42592.0, 60 sec: 42597.3, 300 sec: 42625.9). Total num frames: 1913184256. Throughput: 0: 10655.3. Samples: 228283314. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:48,979][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:50,672][626795] Updated weights for policy 0, policy_version 233552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:52,535][626795] Updated weights for policy 0, policy_version 233562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:53,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42598.5, 300 sec: 42626.2). Total num frames: 1913397248. Throughput: 0: 10665.5. Samples: 228347616. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:53,978][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:54,519][626795] Updated weights for policy 0, policy_version 233572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:56,501][626795] Updated weights for policy 0, policy_version 233582 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:14:58,436][626795] Updated weights for policy 0, policy_version 233592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:58,975][24592] Fps is (10 sec: 42604.9, 60 sec: 42598.4, 300 sec: 42626.2). Total num frames: 1913610240. Throughput: 0: 10659.2. Samples: 228378666. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:14:58,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:00,310][626795] Updated weights for policy 0, policy_version 233602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:02,151][626795] Updated weights for policy 0, policy_version 233612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:03,976][24592] Fps is (10 sec: 42595.9, 60 sec: 42598.1, 300 sec: 42626.1). Total num frames: 1913823232. Throughput: 0: 10661.6. Samples: 228443100. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:03,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000233621_1913823232.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:04,055][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000232371_1903583232.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:04,147][626795] Updated weights for policy 0, policy_version 233622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:05,998][626795] Updated weights for policy 0, policy_version 233632 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:07,946][626795] Updated weights for policy 0, policy_version 233642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:08,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42599.0, 300 sec: 42626.2). Total num frames: 1914036224. Throughput: 0: 10639.6. Samples: 228506520. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:08,977][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:09,890][626795] Updated weights for policy 0, policy_version 233652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:11,871][626795] Updated weights for policy 0, policy_version 233662 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:13,697][626795] Updated weights for policy 0, policy_version 233672 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:13,976][24592] Fps is (10 sec: 42598.4, 60 sec: 42597.9, 300 sec: 42653.8). Total num frames: 1914249216. Throughput: 0: 10641.9. Samples: 228538686. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:13,977][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:15,720][626795] Updated weights for policy 0, policy_version 233682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:17,633][626795] Updated weights for policy 0, policy_version 233692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:18,975][24592] Fps is (10 sec: 42597.4, 60 sec: 42734.9, 300 sec: 42653.9). Total num frames: 1914462208. Throughput: 0: 10626.4. Samples: 228602556. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:18,978][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:19,514][626795] Updated weights for policy 0, policy_version 233702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:21,489][626795] Updated weights for policy 0, policy_version 233712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:23,328][626795] Updated weights for policy 0, policy_version 233722 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:23,976][24592] Fps is (10 sec: 42600.6, 60 sec: 42598.3, 300 sec: 42626.2). Total num frames: 1914675200. Throughput: 0: 10644.4. Samples: 228666528. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:23,976][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:25,298][626795] Updated weights for policy 0, policy_version 233732 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:27,145][626795] Updated weights for policy 0, policy_version 233742 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:28,975][24592] Fps is (10 sec: 41779.7, 60 sec: 42462.2, 300 sec: 42626.2). Total num frames: 1914880000. Throughput: 0: 10645.0. Samples: 228698082. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:28,977][24592] Avg episode reward: [(0, '4.450')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:29,179][626795] Updated weights for policy 0, policy_version 233752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:31,064][626795] Updated weights for policy 0, policy_version 233762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:32,990][626795] Updated weights for policy 0, policy_version 233772 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:33,975][24592] Fps is (10 sec: 42599.2, 60 sec: 42598.4, 300 sec: 42626.2). Total num frames: 1915101184. Throughput: 0: 10633.4. Samples: 228761802. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:33,977][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:35,008][626795] Updated weights for policy 0, policy_version 233782 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:36,855][626795] Updated weights for policy 0, policy_version 233792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:38,820][626795] Updated weights for policy 0, policy_version 233802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:38,976][24592] Fps is (10 sec: 43415.6, 60 sec: 42598.1, 300 sec: 42626.1). Total num frames: 1915314176. Throughput: 0: 10619.1. Samples: 228825480. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:38,978][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:40,765][626795] Updated weights for policy 0, policy_version 233812 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:42,601][626795] Updated weights for policy 0, policy_version 233822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:43,976][24592] Fps is (10 sec: 41778.4, 60 sec: 42461.8, 300 sec: 42598.4). Total num frames: 1915518976. Throughput: 0: 10636.9. Samples: 228857328. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:43,976][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:44,566][626795] Updated weights for policy 0, policy_version 233832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:46,492][626795] Updated weights for policy 0, policy_version 233842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:48,459][626795] Updated weights for policy 0, policy_version 233852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:48,976][24592] Fps is (10 sec: 40959.7, 60 sec: 42326.1, 300 sec: 42570.6). Total num frames: 1915723776. Throughput: 0: 10615.9. Samples: 228920814. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:48,977][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:50,551][626795] Updated weights for policy 0, policy_version 233862 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:52,366][626795] Updated weights for policy 0, policy_version 233872 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:53,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42461.9, 300 sec: 42626.2). Total num frames: 1915944960. Throughput: 0: 10619.3. Samples: 228984390. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:53,977][24592] Avg episode reward: [(0, '4.387')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:54,266][626795] Updated weights for policy 0, policy_version 233882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:56,233][626795] Updated weights for policy 0, policy_version 233892 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:15:58,205][626795] Updated weights for policy 0, policy_version 233902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:58,975][24592] Fps is (10 sec: 42600.6, 60 sec: 42325.4, 300 sec: 42598.4). Total num frames: 1916149760. Throughput: 0: 10590.0. Samples: 229015230. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:15:58,976][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:00,070][626795] Updated weights for policy 0, policy_version 233912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:01,995][626795] Updated weights for policy 0, policy_version 233922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42325.9, 300 sec: 42626.2). Total num frames: 1916362752. Throughput: 0: 10599.2. Samples: 229079520. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:03,977][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:04,042][626795] Updated weights for policy 0, policy_version 233932 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:05,866][626795] Updated weights for policy 0, policy_version 233942 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:07,850][626795] Updated weights for policy 0, policy_version 233952 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:08,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42461.8, 300 sec: 42626.2). Total num frames: 1916583936. Throughput: 0: 10597.5. Samples: 229143414. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:08,977][24592] Avg episode reward: [(0, '4.446')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:09,795][626795] Updated weights for policy 0, policy_version 233962 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:11,589][626795] Updated weights for policy 0, policy_version 233972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:13,588][626795] Updated weights for policy 0, policy_version 233982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:13,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42462.4, 300 sec: 42654.0). Total num frames: 1916796928. Throughput: 0: 10602.0. Samples: 229175172. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:13,978][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:15,532][626795] Updated weights for policy 0, policy_version 233992 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:17,388][626795] Updated weights for policy 0, policy_version 234002 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:18,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42462.0, 300 sec: 42653.9). Total num frames: 1917009920. Throughput: 0: 10611.9. Samples: 229239336. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:18,977][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:19,338][626795] Updated weights for policy 0, policy_version 234012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:21,396][626795] Updated weights for policy 0, policy_version 234022 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:23,088][626795] Updated weights for policy 0, policy_version 234032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:23,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42462.0, 300 sec: 42626.2). Total num frames: 1917222912. Throughput: 0: 10619.6. Samples: 229303356. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:23,976][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:25,169][626795] Updated weights for policy 0, policy_version 234042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:27,028][626795] Updated weights for policy 0, policy_version 234052 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:28,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42461.9, 300 sec: 42598.4). Total num frames: 1917427712. Throughput: 0: 10620.6. Samples: 229335252. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:28,976][626795] Updated weights for policy 0, policy_version 234062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:28,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:30,820][626795] Updated weights for policy 0, policy_version 234072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:32,754][626795] Updated weights for policy 0, policy_version 234082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:33,975][24592] Fps is (10 sec: 42597.8, 60 sec: 42461.8, 300 sec: 42626.2). Total num frames: 1917648896. Throughput: 0: 10636.6. Samples: 229399458. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:33,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:34,716][626795] Updated weights for policy 0, policy_version 234092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:36,692][626795] Updated weights for policy 0, policy_version 234102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:38,508][626795] Updated weights for policy 0, policy_version 234112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:38,976][24592] Fps is (10 sec: 43413.7, 60 sec: 42461.6, 300 sec: 42598.3). Total num frames: 1917861888. Throughput: 0: 10641.4. Samples: 229463262. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:38,978][24592] Avg episode reward: [(0, '4.878')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:40,414][626795] Updated weights for policy 0, policy_version 234122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:42,369][626795] Updated weights for policy 0, policy_version 234132 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:43,975][24592] Fps is (10 sec: 42599.2, 60 sec: 42598.6, 300 sec: 42626.3). Total num frames: 1918074880. Throughput: 0: 10659.4. Samples: 229494900. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:43,976][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:44,351][626795] Updated weights for policy 0, policy_version 234142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:46,136][626795] Updated weights for policy 0, policy_version 234152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:48,091][626795] Updated weights for policy 0, policy_version 234162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:48,975][24592] Fps is (10 sec: 42602.3, 60 sec: 42735.4, 300 sec: 42598.4). Total num frames: 1918287872. Throughput: 0: 10667.9. Samples: 229559574. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:48,976][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:50,075][626795] Updated weights for policy 0, policy_version 234172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:51,916][626795] Updated weights for policy 0, policy_version 234182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:53,947][626795] Updated weights for policy 0, policy_version 234192 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:53,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42598.5, 300 sec: 42570.6). Total num frames: 1918500864. Throughput: 0: 10669.1. Samples: 229623522. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:53,976][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:55,782][626795] Updated weights for policy 0, policy_version 234202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:57,768][626795] Updated weights for policy 0, policy_version 234212 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:58,976][24592] Fps is (10 sec: 43413.6, 60 sec: 42870.9, 300 sec: 42598.3). Total num frames: 1918722048. Throughput: 0: 10669.7. Samples: 229655316. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:16:58,978][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:16:59,562][626795] Updated weights for policy 0, policy_version 234222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:01,545][626795] Updated weights for policy 0, policy_version 234232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:03,449][626795] Updated weights for policy 0, policy_version 234242 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:03,977][24592] Fps is (10 sec: 42591.7, 60 sec: 42733.8, 300 sec: 42570.4). Total num frames: 1918926848. Throughput: 0: 10672.0. Samples: 229719594. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:03,980][24592] Avg episode reward: [(0, '4.327')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000234244_1918926848.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:04,072][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000232997_1908711424.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:05,359][626795] Updated weights for policy 0, policy_version 234252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:07,312][626795] Updated weights for policy 0, policy_version 234262 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:08,975][24592] Fps is (10 sec: 41783.2, 60 sec: 42598.5, 300 sec: 42598.6). Total num frames: 1919139840. Throughput: 0: 10684.5. Samples: 229784160. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:08,976][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:09,266][626795] Updated weights for policy 0, policy_version 234272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:11,188][626795] Updated weights for policy 0, policy_version 234282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:13,066][626795] Updated weights for policy 0, policy_version 234292 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:13,975][24592] Fps is (10 sec: 42605.0, 60 sec: 42598.4, 300 sec: 42570.6). Total num frames: 1919352832. Throughput: 0: 10662.0. Samples: 229815042. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:13,976][24592] Avg episode reward: [(0, '4.920')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:14,912][626795] Updated weights for policy 0, policy_version 234302 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:16,855][626795] Updated weights for policy 0, policy_version 234312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:18,783][626795] Updated weights for policy 0, policy_version 234322 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:18,976][24592] Fps is (10 sec: 43416.1, 60 sec: 42734.7, 300 sec: 42598.4). Total num frames: 1919574016. Throughput: 0: 10677.7. Samples: 229879956. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:18,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:20,631][626795] Updated weights for policy 0, policy_version 234332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:22,610][626795] Updated weights for policy 0, policy_version 234342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:23,976][24592] Fps is (10 sec: 43416.0, 60 sec: 42734.7, 300 sec: 42598.4). Total num frames: 1919787008. Throughput: 0: 10692.7. Samples: 229944426. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:23,977][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:24,476][626795] Updated weights for policy 0, policy_version 234352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:26,511][626795] Updated weights for policy 0, policy_version 234362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:28,318][626795] Updated weights for policy 0, policy_version 234372 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:28,976][24592] Fps is (10 sec: 42595.1, 60 sec: 42870.7, 300 sec: 42598.3). Total num frames: 1920000000. Throughput: 0: 10686.9. Samples: 229975824. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:28,978][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:30,192][626795] Updated weights for policy 0, policy_version 234382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:32,110][626795] Updated weights for policy 0, policy_version 234392 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:33,976][24592] Fps is (10 sec: 42598.9, 60 sec: 42734.9, 300 sec: 42598.4). Total num frames: 1920212992. Throughput: 0: 10673.5. Samples: 230039886. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:33,976][24592] Avg episode reward: [(0, '4.423')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:34,192][626795] Updated weights for policy 0, policy_version 234402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:36,055][626795] Updated weights for policy 0, policy_version 234412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:37,821][626795] Updated weights for policy 0, policy_version 234422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:38,977][24592] Fps is (10 sec: 42594.9, 60 sec: 42734.2, 300 sec: 42598.2). Total num frames: 1920425984. Throughput: 0: 10698.6. Samples: 230104980. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:38,979][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:39,809][626795] Updated weights for policy 0, policy_version 234432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:41,812][626795] Updated weights for policy 0, policy_version 234442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:43,655][626795] Updated weights for policy 0, policy_version 234452 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:43,977][24592] Fps is (10 sec: 42595.3, 60 sec: 42734.2, 300 sec: 42598.3). Total num frames: 1920638976. Throughput: 0: 10692.6. Samples: 230136486. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:43,983][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:45,590][626795] Updated weights for policy 0, policy_version 234462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:47,580][626795] Updated weights for policy 0, policy_version 234472 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:48,975][24592] Fps is (10 sec: 42606.6, 60 sec: 42735.0, 300 sec: 42598.4). Total num frames: 1920851968. Throughput: 0: 10676.4. Samples: 230200014. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:48,979][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:49,581][626795] Updated weights for policy 0, policy_version 234482 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:51,428][626795] Updated weights for policy 0, policy_version 234492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:53,343][626795] Updated weights for policy 0, policy_version 234502 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:53,976][24592] Fps is (10 sec: 41782.4, 60 sec: 42598.2, 300 sec: 42570.6). Total num frames: 1921056768. Throughput: 0: 10651.8. Samples: 230263494. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:53,978][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:55,364][626795] Updated weights for policy 0, policy_version 234512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:57,237][626795] Updated weights for policy 0, policy_version 234522 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:58,975][24592] Fps is (10 sec: 41778.4, 60 sec: 42462.4, 300 sec: 42570.6). Total num frames: 1921269760. Throughput: 0: 10666.6. Samples: 230295042. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:17:58,978][24592] Avg episode reward: [(0, '4.965')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:17:59,249][626795] Updated weights for policy 0, policy_version 234532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:01,187][626795] Updated weights for policy 0, policy_version 234542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:03,054][626795] Updated weights for policy 0, policy_version 234552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:03,975][24592] Fps is (10 sec: 43418.4, 60 sec: 42736.0, 300 sec: 42598.4). Total num frames: 1921490944. Throughput: 0: 10649.8. Samples: 230359194. Policy #0 lag: (min: 1.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:03,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:05,032][626795] Updated weights for policy 0, policy_version 234562 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:06,901][626795] Updated weights for policy 0, policy_version 234572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:08,884][626795] Updated weights for policy 0, policy_version 234582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:08,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42598.3, 300 sec: 42598.4). Total num frames: 1921695744. Throughput: 0: 10616.2. Samples: 230422152. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:08,977][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:10,826][626795] Updated weights for policy 0, policy_version 234592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:12,592][626795] Updated weights for policy 0, policy_version 234602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:13,975][24592] Fps is (10 sec: 41779.5, 60 sec: 42598.4, 300 sec: 42570.7). Total num frames: 1921908736. Throughput: 0: 10641.3. Samples: 230454672. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:13,976][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:14,678][626795] Updated weights for policy 0, policy_version 234612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:16,672][626795] Updated weights for policy 0, policy_version 234622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:18,474][626795] Updated weights for policy 0, policy_version 234632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:18,976][24592] Fps is (10 sec: 42597.4, 60 sec: 42461.9, 300 sec: 42570.6). Total num frames: 1922121728. Throughput: 0: 10622.7. Samples: 230517906. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:18,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:20,356][626795] Updated weights for policy 0, policy_version 234642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:22,398][626795] Updated weights for policy 0, policy_version 234652 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:23,975][24592] Fps is (10 sec: 42597.5, 60 sec: 42462.0, 300 sec: 42570.6). Total num frames: 1922334720. Throughput: 0: 10599.1. Samples: 230581920. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:23,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:24,272][626795] Updated weights for policy 0, policy_version 234662 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:26,234][626795] Updated weights for policy 0, policy_version 234672 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:28,198][626795] Updated weights for policy 0, policy_version 234682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:28,975][24592] Fps is (10 sec: 42599.2, 60 sec: 42462.6, 300 sec: 42570.6). Total num frames: 1922547712. Throughput: 0: 10580.3. Samples: 230612592. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:28,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:30,169][626795] Updated weights for policy 0, policy_version 234692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:32,016][626795] Updated weights for policy 0, policy_version 234702 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:33,976][24592] Fps is (10 sec: 41775.1, 60 sec: 42324.7, 300 sec: 42542.7). Total num frames: 1922752512. Throughput: 0: 10598.4. Samples: 230676954. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:33,978][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:33,992][626795] Updated weights for policy 0, policy_version 234712 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:35,935][626795] Updated weights for policy 0, policy_version 234722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:37,893][626795] Updated weights for policy 0, policy_version 234732 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:38,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42463.2, 300 sec: 42598.6). Total num frames: 1922973696. Throughput: 0: 10601.5. Samples: 230740560. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:38,979][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:39,694][626795] Updated weights for policy 0, policy_version 234742 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:41,782][626795] Updated weights for policy 0, policy_version 234752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:43,518][626795] Updated weights for policy 0, policy_version 234762 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:43,976][24592] Fps is (10 sec: 42602.4, 60 sec: 42325.9, 300 sec: 42542.8). Total num frames: 1923178496. Throughput: 0: 10614.0. Samples: 230772672. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:43,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:45,528][626795] Updated weights for policy 0, policy_version 234772 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:47,496][626795] Updated weights for policy 0, policy_version 234782 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:48,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42461.8, 300 sec: 42570.7). Total num frames: 1923399680. Throughput: 0: 10603.5. Samples: 230836350. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:48,976][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:49,410][626795] Updated weights for policy 0, policy_version 234792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:51,249][626795] Updated weights for policy 0, policy_version 234802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:53,197][626795] Updated weights for policy 0, policy_version 234812 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:53,975][24592] Fps is (10 sec: 43418.5, 60 sec: 42598.5, 300 sec: 42570.6). Total num frames: 1923612672. Throughput: 0: 10638.7. Samples: 230900892. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:53,976][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:55,178][626795] Updated weights for policy 0, policy_version 234822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:57,141][626795] Updated weights for policy 0, policy_version 234832 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:58,975][24592] Fps is (10 sec: 41779.6, 60 sec: 42462.0, 300 sec: 42542.9). Total num frames: 1923817472. Throughput: 0: 10607.1. Samples: 230931990. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:18:58,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:18:59,014][626795] Updated weights for policy 0, policy_version 234842 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:00,975][626795] Updated weights for policy 0, policy_version 234852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:02,798][626795] Updated weights for policy 0, policy_version 234862 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:03,975][24592] Fps is (10 sec: 41778.7, 60 sec: 42325.3, 300 sec: 42543.0). Total num frames: 1924030464. Throughput: 0: 10618.7. Samples: 230995746. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:03,978][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000234867_1924030464.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:04,051][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000233621_1913823232.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:04,930][626795] Updated weights for policy 0, policy_version 234872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:06,719][626795] Updated weights for policy 0, policy_version 234882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:08,720][626795] Updated weights for policy 0, policy_version 234892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:08,976][24592] Fps is (10 sec: 42596.7, 60 sec: 42461.6, 300 sec: 42542.8). Total num frames: 1924243456. Throughput: 0: 10606.1. Samples: 231059196. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:08,976][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:10,675][626795] Updated weights for policy 0, policy_version 234902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:12,572][626795] Updated weights for policy 0, policy_version 234912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:13,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42461.8, 300 sec: 42570.6). Total num frames: 1924456448. Throughput: 0: 10624.8. Samples: 231090708. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:13,977][24592] Avg episode reward: [(0, '4.916')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:14,502][626795] Updated weights for policy 0, policy_version 234922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:16,439][626795] Updated weights for policy 0, policy_version 234932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:18,358][626795] Updated weights for policy 0, policy_version 234942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:18,975][24592] Fps is (10 sec: 42599.7, 60 sec: 42462.0, 300 sec: 42542.9). Total num frames: 1924669440. Throughput: 0: 10619.2. Samples: 231154806. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:18,977][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:20,308][626795] Updated weights for policy 0, policy_version 234952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:22,098][626795] Updated weights for policy 0, policy_version 234962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:23,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42462.0, 300 sec: 42542.9). Total num frames: 1924882432. Throughput: 0: 10635.5. Samples: 231219156. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:23,977][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:24,039][626795] Updated weights for policy 0, policy_version 234972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:25,967][626795] Updated weights for policy 0, policy_version 234982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:27,969][626795] Updated weights for policy 0, policy_version 234992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:28,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42462.0, 300 sec: 42542.9). Total num frames: 1925095424. Throughput: 0: 10617.0. Samples: 231250434. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:28,976][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:29,866][626795] Updated weights for policy 0, policy_version 235002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:31,819][626795] Updated weights for policy 0, policy_version 235012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:31,840][626772] Signal inference workers to stop experience collection... (3100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:31,845][626772] Signal inference workers to resume experience collection... (3100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:31,850][626795] InferenceWorker_p0-w0: stopping experience collection (3100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:31,859][626795] InferenceWorker_p0-w0: resuming experience collection (3100 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:33,819][626795] Updated weights for policy 0, policy_version 235022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:33,976][24592] Fps is (10 sec: 41777.8, 60 sec: 42462.4, 300 sec: 42515.1). Total num frames: 1925300224. Throughput: 0: 10619.5. Samples: 231314232. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:33,978][24592] Avg episode reward: [(0, '4.903')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:35,643][626795] Updated weights for policy 0, policy_version 235032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:37,630][626795] Updated weights for policy 0, policy_version 235042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:38,975][24592] Fps is (10 sec: 42597.6, 60 sec: 42461.8, 300 sec: 42542.9). Total num frames: 1925521408. Throughput: 0: 10601.0. Samples: 231377940. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:38,976][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:39,473][626795] Updated weights for policy 0, policy_version 235052 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:41,448][626795] Updated weights for policy 0, policy_version 235062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:43,416][626795] Updated weights for policy 0, policy_version 235072 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:43,976][24592] Fps is (10 sec: 42599.0, 60 sec: 42461.8, 300 sec: 42515.3). Total num frames: 1925726208. Throughput: 0: 10619.8. Samples: 231409884. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:43,976][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:45,214][626795] Updated weights for policy 0, policy_version 235082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:47,192][626795] Updated weights for policy 0, policy_version 235092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:48,976][24592] Fps is (10 sec: 42598.0, 60 sec: 42461.8, 300 sec: 42542.8). Total num frames: 1925947392. Throughput: 0: 10618.4. Samples: 231473574. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:48,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:49,238][626795] Updated weights for policy 0, policy_version 235102 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:51,054][626795] Updated weights for policy 0, policy_version 235112 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:52,980][626795] Updated weights for policy 0, policy_version 235122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:53,975][24592] Fps is (10 sec: 43418.5, 60 sec: 42461.9, 300 sec: 42542.9). Total num frames: 1926160384. Throughput: 0: 10628.4. Samples: 231537468. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:53,977][24592] Avg episode reward: [(0, '4.969')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:54,959][626795] Updated weights for policy 0, policy_version 235132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:56,842][626795] Updated weights for policy 0, policy_version 235142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:19:58,711][626795] Updated weights for policy 0, policy_version 235152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:58,976][24592] Fps is (10 sec: 42597.9, 60 sec: 42598.2, 300 sec: 42542.9). Total num frames: 1926373376. Throughput: 0: 10641.8. Samples: 231569592. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:19:58,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:00,708][626795] Updated weights for policy 0, policy_version 235162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:02,670][626795] Updated weights for policy 0, policy_version 235172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:03,976][24592] Fps is (10 sec: 41777.8, 60 sec: 42461.7, 300 sec: 42515.0). Total num frames: 1926578176. Throughput: 0: 10619.0. Samples: 231632664. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:03,976][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:04,648][626795] Updated weights for policy 0, policy_version 235182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:06,459][626795] Updated weights for policy 0, policy_version 235192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:08,467][626795] Updated weights for policy 0, policy_version 235202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:08,975][24592] Fps is (10 sec: 41780.6, 60 sec: 42462.1, 300 sec: 42515.2). Total num frames: 1926791168. Throughput: 0: 10616.4. Samples: 231696894. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:08,976][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:10,387][626795] Updated weights for policy 0, policy_version 235212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:12,238][626795] Updated weights for policy 0, policy_version 235222 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:13,976][24592] Fps is (10 sec: 42598.1, 60 sec: 42461.6, 300 sec: 42515.1). Total num frames: 1927004160. Throughput: 0: 10631.4. Samples: 231728850. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:13,979][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:14,215][626795] Updated weights for policy 0, policy_version 235232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:16,200][626795] Updated weights for policy 0, policy_version 235242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:17,998][626795] Updated weights for policy 0, policy_version 235252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:18,976][24592] Fps is (10 sec: 42596.7, 60 sec: 42461.6, 300 sec: 42515.1). Total num frames: 1927217152. Throughput: 0: 10624.7. Samples: 231792342. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:18,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:19,950][626795] Updated weights for policy 0, policy_version 235262 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:21,851][626795] Updated weights for policy 0, policy_version 235272 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:23,825][626795] Updated weights for policy 0, policy_version 235282 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:23,975][24592] Fps is (10 sec: 42600.1, 60 sec: 42461.9, 300 sec: 42542.9). Total num frames: 1927430144. Throughput: 0: 10635.4. Samples: 231856530. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:23,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:25,705][626795] Updated weights for policy 0, policy_version 235292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:27,688][626795] Updated weights for policy 0, policy_version 235302 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:28,975][24592] Fps is (10 sec: 43419.1, 60 sec: 42598.3, 300 sec: 42542.9). Total num frames: 1927651328. Throughput: 0: 10640.2. Samples: 231888690. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:28,978][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:29,564][626795] Updated weights for policy 0, policy_version 235312 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:31,529][626795] Updated weights for policy 0, policy_version 235322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:33,442][626795] Updated weights for policy 0, policy_version 235332 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:33,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42598.6, 300 sec: 42515.2). Total num frames: 1927856128. Throughput: 0: 10621.2. Samples: 231951528. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:33,976][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:35,419][626795] Updated weights for policy 0, policy_version 235342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:37,341][626795] Updated weights for policy 0, policy_version 235352 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:38,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42461.9, 300 sec: 42542.9). Total num frames: 1928069120. Throughput: 0: 10624.0. Samples: 232015548. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:38,977][24592] Avg episode reward: [(0, '4.856')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:39,369][626795] Updated weights for policy 0, policy_version 235362 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:41,162][626795] Updated weights for policy 0, policy_version 235372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:43,149][626795] Updated weights for policy 0, policy_version 235382 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:43,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42598.6, 300 sec: 42570.7). Total num frames: 1928282112. Throughput: 0: 10622.5. Samples: 232047600. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:43,976][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:45,129][626795] Updated weights for policy 0, policy_version 235392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:47,094][626795] Updated weights for policy 0, policy_version 235402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:48,928][626795] Updated weights for policy 0, policy_version 235412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:48,976][24592] Fps is (10 sec: 42597.8, 60 sec: 42461.9, 300 sec: 42542.8). Total num frames: 1928495104. Throughput: 0: 10623.1. Samples: 232110702. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:48,977][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:50,807][626795] Updated weights for policy 0, policy_version 235422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:52,791][626795] Updated weights for policy 0, policy_version 235432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:53,976][24592] Fps is (10 sec: 42596.4, 60 sec: 42461.5, 300 sec: 42570.6). Total num frames: 1928708096. Throughput: 0: 10615.5. Samples: 232174596. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:53,978][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:54,711][626795] Updated weights for policy 0, policy_version 235442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:56,639][626795] Updated weights for policy 0, policy_version 235452 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:20:58,508][626795] Updated weights for policy 0, policy_version 235462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:58,976][24592] Fps is (10 sec: 42594.9, 60 sec: 42461.4, 300 sec: 42570.5). Total num frames: 1928921088. Throughput: 0: 10617.3. Samples: 232206636. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:20:58,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:00,487][626795] Updated weights for policy 0, policy_version 235472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:02,382][626795] Updated weights for policy 0, policy_version 235482 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:03,975][24592] Fps is (10 sec: 42600.2, 60 sec: 42598.6, 300 sec: 42542.9). Total num frames: 1929134080. Throughput: 0: 10637.0. Samples: 232271004. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:03,978][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000235490_1929134080.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:04,051][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000234244_1918926848.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:04,315][626795] Updated weights for policy 0, policy_version 235492 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:06,350][626795] Updated weights for policy 0, policy_version 235502 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:08,215][626795] Updated weights for policy 0, policy_version 235512 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:08,975][24592] Fps is (10 sec: 41783.4, 60 sec: 42461.9, 300 sec: 42515.1). Total num frames: 1929338880. Throughput: 0: 10606.0. Samples: 232333800. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:08,976][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:10,186][626795] Updated weights for policy 0, policy_version 235522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:12,068][626795] Updated weights for policy 0, policy_version 235532 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42462.1, 300 sec: 42515.1). Total num frames: 1929551872. Throughput: 0: 10601.7. Samples: 232365768. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:13,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:14,001][626795] Updated weights for policy 0, policy_version 235542 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:15,918][626795] Updated weights for policy 0, policy_version 235552 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:17,880][626795] Updated weights for policy 0, policy_version 235562 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:18,976][24592] Fps is (10 sec: 43417.6, 60 sec: 42598.7, 300 sec: 42542.9). Total num frames: 1929773056. Throughput: 0: 10623.3. Samples: 232429578. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:18,977][24592] Avg episode reward: [(0, '4.885')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:19,772][626795] Updated weights for policy 0, policy_version 235572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:21,807][626795] Updated weights for policy 0, policy_version 235582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:23,647][626795] Updated weights for policy 0, policy_version 235592 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:23,975][24592] Fps is (10 sec: 42597.8, 60 sec: 42461.8, 300 sec: 42542.8). Total num frames: 1929977856. Throughput: 0: 10631.3. Samples: 232493958. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:23,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:25,516][626795] Updated weights for policy 0, policy_version 235602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:27,382][626795] Updated weights for policy 0, policy_version 235612 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:28,976][24592] Fps is (10 sec: 42596.3, 60 sec: 42461.6, 300 sec: 42542.8). Total num frames: 1930199040. Throughput: 0: 10626.3. Samples: 232525788. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:28,977][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:29,377][626795] Updated weights for policy 0, policy_version 235622 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:31,272][626795] Updated weights for policy 0, policy_version 235632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:33,219][626795] Updated weights for policy 0, policy_version 235642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:33,975][24592] Fps is (10 sec: 43418.2, 60 sec: 42598.4, 300 sec: 42543.0). Total num frames: 1930412032. Throughput: 0: 10651.2. Samples: 232590006. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:33,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:35,090][626795] Updated weights for policy 0, policy_version 235652 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:37,029][626795] Updated weights for policy 0, policy_version 235662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:38,934][626795] Updated weights for policy 0, policy_version 235672 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:38,975][24592] Fps is (10 sec: 42600.4, 60 sec: 42598.4, 300 sec: 42542.8). Total num frames: 1930625024. Throughput: 0: 10644.9. Samples: 232653612. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:38,977][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:40,899][626795] Updated weights for policy 0, policy_version 235682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:42,894][626795] Updated weights for policy 0, policy_version 235692 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:43,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42598.4, 300 sec: 42542.9). Total num frames: 1930838016. Throughput: 0: 10643.8. Samples: 232685598. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:43,976][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:44,643][626795] Updated weights for policy 0, policy_version 235702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:46,768][626795] Updated weights for policy 0, policy_version 235712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:48,643][626795] Updated weights for policy 0, policy_version 235722 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:48,976][24592] Fps is (10 sec: 41778.6, 60 sec: 42461.9, 300 sec: 42515.1). Total num frames: 1931042816. Throughput: 0: 10619.6. Samples: 232748886. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:48,977][24592] Avg episode reward: [(0, '4.967')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:50,617][626795] Updated weights for policy 0, policy_version 235732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:52,470][626795] Updated weights for policy 0, policy_version 235742 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:53,976][24592] Fps is (10 sec: 42595.9, 60 sec: 42598.3, 300 sec: 42515.1). Total num frames: 1931264000. Throughput: 0: 10645.6. Samples: 232812858. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:53,978][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:54,494][626795] Updated weights for policy 0, policy_version 235752 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:56,382][626795] Updated weights for policy 0, policy_version 235762 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:21:58,209][626795] Updated weights for policy 0, policy_version 235772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:58,976][24592] Fps is (10 sec: 42598.3, 60 sec: 42462.4, 300 sec: 42515.3). Total num frames: 1931468800. Throughput: 0: 10638.2. Samples: 232844490. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:21:58,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:00,141][626795] Updated weights for policy 0, policy_version 235782 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:02,057][626795] Updated weights for policy 0, policy_version 235792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:03,975][24592] Fps is (10 sec: 41781.5, 60 sec: 42461.9, 300 sec: 42515.1). Total num frames: 1931681792. Throughput: 0: 10653.3. Samples: 232908978. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:03,976][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:04,084][626795] Updated weights for policy 0, policy_version 235802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:05,861][626795] Updated weights for policy 0, policy_version 235812 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:07,826][626795] Updated weights for policy 0, policy_version 235822 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:08,975][24592] Fps is (10 sec: 42599.2, 60 sec: 42598.4, 300 sec: 42515.1). Total num frames: 1931894784. Throughput: 0: 10633.4. Samples: 232972458. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:08,977][24592] Avg episode reward: [(0, '5.034')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:09,806][626795] Updated weights for policy 0, policy_version 235832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:11,844][626795] Updated weights for policy 0, policy_version 235842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:13,568][626795] Updated weights for policy 0, policy_version 235852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:13,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42598.3, 300 sec: 42487.3). Total num frames: 1932107776. Throughput: 0: 10640.8. Samples: 233004618. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:13,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:15,605][626795] Updated weights for policy 0, policy_version 235862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:17,505][626795] Updated weights for policy 0, policy_version 235872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:18,977][24592] Fps is (10 sec: 42596.8, 60 sec: 42461.6, 300 sec: 42487.3). Total num frames: 1932320768. Throughput: 0: 10640.4. Samples: 233068830. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:18,979][24592] Avg episode reward: [(0, '4.999')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:19,438][626795] Updated weights for policy 0, policy_version 235882 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:21,357][626795] Updated weights for policy 0, policy_version 235892 (0.0031)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:23,205][626795] Updated weights for policy 0, policy_version 235902 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:23,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42598.5, 300 sec: 42487.5). Total num frames: 1932533760. Throughput: 0: 10633.5. Samples: 233132118. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:23,976][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:25,256][626795] Updated weights for policy 0, policy_version 235912 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:26,979][626795] Updated weights for policy 0, policy_version 235922 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:28,975][24592] Fps is (10 sec: 42599.9, 60 sec: 42462.2, 300 sec: 42487.3). Total num frames: 1932746752. Throughput: 0: 10633.7. Samples: 233164116. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:28,978][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:28,995][626795] Updated weights for policy 0, policy_version 235932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:30,934][626795] Updated weights for policy 0, policy_version 235942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:32,946][626795] Updated weights for policy 0, policy_version 235952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:33,976][24592] Fps is (10 sec: 43415.1, 60 sec: 42598.0, 300 sec: 42515.3). Total num frames: 1932967936. Throughput: 0: 10644.7. Samples: 233227902. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:33,978][24592] Avg episode reward: [(0, '4.332')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:34,789][626795] Updated weights for policy 0, policy_version 235962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:36,712][626795] Updated weights for policy 0, policy_version 235972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:38,640][626795] Updated weights for policy 0, policy_version 235982 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:38,976][24592] Fps is (10 sec: 42597.4, 60 sec: 42461.7, 300 sec: 42487.4). Total num frames: 1933172736. Throughput: 0: 10645.1. Samples: 233291886. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:38,977][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:40,646][626795] Updated weights for policy 0, policy_version 235992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:42,581][626795] Updated weights for policy 0, policy_version 236002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:43,975][24592] Fps is (10 sec: 41781.6, 60 sec: 42461.8, 300 sec: 42487.3). Total num frames: 1933385728. Throughput: 0: 10631.9. Samples: 233322924. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:43,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:44,588][626795] Updated weights for policy 0, policy_version 236012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:46,432][626795] Updated weights for policy 0, policy_version 236022 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:48,310][626795] Updated weights for policy 0, policy_version 236032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:48,976][24592] Fps is (10 sec: 42598.8, 60 sec: 42598.4, 300 sec: 42515.1). Total num frames: 1933598720. Throughput: 0: 10620.6. Samples: 233386908. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:48,978][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:50,277][626795] Updated weights for policy 0, policy_version 236042 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:52,157][626795] Updated weights for policy 0, policy_version 236052 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:53,976][24592] Fps is (10 sec: 42597.7, 60 sec: 42462.1, 300 sec: 42515.1). Total num frames: 1933811712. Throughput: 0: 10630.2. Samples: 233450820. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:53,977][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:54,173][626795] Updated weights for policy 0, policy_version 236062 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:56,016][626795] Updated weights for policy 0, policy_version 236072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:58,023][626795] Updated weights for policy 0, policy_version 236082 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:58,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42598.5, 300 sec: 42487.3). Total num frames: 1934024704. Throughput: 0: 10608.3. Samples: 233481990. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:22:58,976][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:22:59,881][626795] Updated weights for policy 0, policy_version 236092 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:01,865][626795] Updated weights for policy 0, policy_version 236102 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:03,708][626795] Updated weights for policy 0, policy_version 236112 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:03,976][24592] Fps is (10 sec: 42597.9, 60 sec: 42598.2, 300 sec: 42515.0). Total num frames: 1934237696. Throughput: 0: 10618.4. Samples: 233546658. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:03,978][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000236113_1934237696.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:04,058][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000234867_1924030464.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:05,826][626795] Updated weights for policy 0, policy_version 236122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:07,620][626795] Updated weights for policy 0, policy_version 236132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42598.4, 300 sec: 42515.1). Total num frames: 1934450688. Throughput: 0: 10614.4. Samples: 233609766. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:08,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:09,653][626795] Updated weights for policy 0, policy_version 236142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:11,528][626795] Updated weights for policy 0, policy_version 236152 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:13,482][626795] Updated weights for policy 0, policy_version 236162 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:13,975][24592] Fps is (10 sec: 42599.3, 60 sec: 42598.4, 300 sec: 42515.1). Total num frames: 1934663680. Throughput: 0: 10613.3. Samples: 233641716. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:13,976][24592] Avg episode reward: [(0, '5.059')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:15,355][626795] Updated weights for policy 0, policy_version 236172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:17,355][626795] Updated weights for policy 0, policy_version 236182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:18,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42462.1, 300 sec: 42487.3). Total num frames: 1934868480. Throughput: 0: 10594.7. Samples: 233704656. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:18,976][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:19,261][626795] Updated weights for policy 0, policy_version 236192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:21,140][626795] Updated weights for policy 0, policy_version 236202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:23,171][626795] Updated weights for policy 0, policy_version 236212 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:23,976][24592] Fps is (10 sec: 40959.8, 60 sec: 42325.2, 300 sec: 42459.5). Total num frames: 1935073280. Throughput: 0: 10585.2. Samples: 233768220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:23,977][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:25,015][626795] Updated weights for policy 0, policy_version 236222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:26,972][626795] Updated weights for policy 0, policy_version 236232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:28,929][626795] Updated weights for policy 0, policy_version 236242 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:28,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42461.9, 300 sec: 42515.3). Total num frames: 1935294464. Throughput: 0: 10607.1. Samples: 233800242. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:28,978][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:30,854][626795] Updated weights for policy 0, policy_version 236252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:32,604][626795] Updated weights for policy 0, policy_version 236262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:33,975][24592] Fps is (10 sec: 43418.2, 60 sec: 42325.8, 300 sec: 42487.3). Total num frames: 1935507456. Throughput: 0: 10622.4. Samples: 233864916. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:33,977][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:34,717][626795] Updated weights for policy 0, policy_version 236272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:34,737][626772] Signal inference workers to stop experience collection... (3150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:34,737][626772] Signal inference workers to resume experience collection... (3150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:34,745][626795] InferenceWorker_p0-w0: stopping experience collection (3150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:34,752][626795] InferenceWorker_p0-w0: resuming experience collection (3150 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:36,560][626795] Updated weights for policy 0, policy_version 236282 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:38,517][626795] Updated weights for policy 0, policy_version 236292 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:38,976][24592] Fps is (10 sec: 42596.8, 60 sec: 42461.8, 300 sec: 42515.1). Total num frames: 1935720448. Throughput: 0: 10605.8. Samples: 233928084. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:38,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:40,470][626795] Updated weights for policy 0, policy_version 236302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:42,425][626795] Updated weights for policy 0, policy_version 236312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:43,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42325.3, 300 sec: 42459.6). Total num frames: 1935925248. Throughput: 0: 10622.4. Samples: 233959998. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:43,976][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:44,238][626795] Updated weights for policy 0, policy_version 236322 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:46,253][626795] Updated weights for policy 0, policy_version 236332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:48,226][626795] Updated weights for policy 0, policy_version 236342 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:48,975][24592] Fps is (10 sec: 42600.0, 60 sec: 42462.0, 300 sec: 42487.3). Total num frames: 1936146432. Throughput: 0: 10590.9. Samples: 234023244. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:48,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:50,148][626795] Updated weights for policy 0, policy_version 236352 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:52,083][626795] Updated weights for policy 0, policy_version 236362 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:53,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42325.4, 300 sec: 42487.3). Total num frames: 1936351232. Throughput: 0: 10603.1. Samples: 234086904. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:53,976][24592] Avg episode reward: [(0, '5.012')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:54,024][626795] Updated weights for policy 0, policy_version 236372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:56,031][626795] Updated weights for policy 0, policy_version 236382 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:57,893][626795] Updated weights for policy 0, policy_version 236392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42325.4, 300 sec: 42487.3). Total num frames: 1936564224. Throughput: 0: 10573.5. Samples: 234117522. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:23:58,978][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:23:59,927][626795] Updated weights for policy 0, policy_version 236402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:01,775][626795] Updated weights for policy 0, policy_version 236412 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:03,696][626795] Updated weights for policy 0, policy_version 236422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:03,975][24592] Fps is (10 sec: 42598.9, 60 sec: 42325.6, 300 sec: 42487.4). Total num frames: 1936777216. Throughput: 0: 10607.1. Samples: 234181974. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:03,977][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:05,592][626795] Updated weights for policy 0, policy_version 236432 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:07,504][626795] Updated weights for policy 0, policy_version 236442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42325.4, 300 sec: 42487.3). Total num frames: 1936990208. Throughput: 0: 10603.0. Samples: 234245352. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:08,977][24592] Avg episode reward: [(0, '4.897')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:09,471][626795] Updated weights for policy 0, policy_version 236452 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:11,391][626795] Updated weights for policy 0, policy_version 236462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:13,421][626795] Updated weights for policy 0, policy_version 236472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:13,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42325.4, 300 sec: 42487.3). Total num frames: 1937203200. Throughput: 0: 10609.2. Samples: 234277656. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:13,977][24592] Avg episode reward: [(0, '4.490')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:15,196][626795] Updated weights for policy 0, policy_version 236482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:17,169][626795] Updated weights for policy 0, policy_version 236492 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:18,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42461.9, 300 sec: 42487.3). Total num frames: 1937416192. Throughput: 0: 10576.5. Samples: 234340860. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:18,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:19,163][626795] Updated weights for policy 0, policy_version 236502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:20,998][626795] Updated weights for policy 0, policy_version 236512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:23,003][626795] Updated weights for policy 0, policy_version 236522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:23,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42598.5, 300 sec: 42487.3). Total num frames: 1937629184. Throughput: 0: 10587.6. Samples: 234404520. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:23,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:25,012][626795] Updated weights for policy 0, policy_version 236532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:26,927][626795] Updated weights for policy 0, policy_version 236542 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:28,700][626795] Updated weights for policy 0, policy_version 236552 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:28,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42461.9, 300 sec: 42515.1). Total num frames: 1937842176. Throughput: 0: 10577.2. Samples: 234435972. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:28,976][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:30,793][626795] Updated weights for policy 0, policy_version 236562 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:32,775][626795] Updated weights for policy 0, policy_version 236572 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42325.3, 300 sec: 42459.6). Total num frames: 1938046976. Throughput: 0: 10584.9. Samples: 234499566. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:33,977][24592] Avg episode reward: [(0, '5.022')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:34,716][626795] Updated weights for policy 0, policy_version 236582 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:36,649][626795] Updated weights for policy 0, policy_version 236592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:38,551][626795] Updated weights for policy 0, policy_version 236602 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:38,975][24592] Fps is (10 sec: 41778.8, 60 sec: 42325.5, 300 sec: 42487.3). Total num frames: 1938259968. Throughput: 0: 10578.0. Samples: 234562914. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:38,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:40,561][626795] Updated weights for policy 0, policy_version 236612 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:42,337][626795] Updated weights for policy 0, policy_version 236622 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:43,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42461.9, 300 sec: 42459.6). Total num frames: 1938472960. Throughput: 0: 10599.9. Samples: 234594516. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:43,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:44,344][626795] Updated weights for policy 0, policy_version 236632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:46,243][626795] Updated weights for policy 0, policy_version 236642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:48,167][626795] Updated weights for policy 0, policy_version 236652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:48,976][24592] Fps is (10 sec: 41779.0, 60 sec: 42188.7, 300 sec: 42431.8). Total num frames: 1938677760. Throughput: 0: 10595.4. Samples: 234658770. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:48,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:50,097][626795] Updated weights for policy 0, policy_version 236662 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:52,049][626795] Updated weights for policy 0, policy_version 236672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:53,968][626795] Updated weights for policy 0, policy_version 236682 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:53,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42461.9, 300 sec: 42459.6). Total num frames: 1938898944. Throughput: 0: 10595.2. Samples: 234722136. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:53,976][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:55,914][626795] Updated weights for policy 0, policy_version 236692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:57,822][626795] Updated weights for policy 0, policy_version 236702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:58,976][24592] Fps is (10 sec: 42597.2, 60 sec: 42325.0, 300 sec: 42459.5). Total num frames: 1939103744. Throughput: 0: 10580.3. Samples: 234753774. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:24:58,977][24592] Avg episode reward: [(0, '4.995')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:24:59,806][626795] Updated weights for policy 0, policy_version 236712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:01,867][626795] Updated weights for policy 0, policy_version 236722 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:03,724][626795] Updated weights for policy 0, policy_version 236732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:03,976][24592] Fps is (10 sec: 41777.8, 60 sec: 42325.1, 300 sec: 42459.5). Total num frames: 1939316736. Throughput: 0: 10560.6. Samples: 234816090. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:03,976][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000236733_1939316736.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:04,056][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000235490_1929134080.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:05,671][626795] Updated weights for policy 0, policy_version 236742 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:07,687][626795] Updated weights for policy 0, policy_version 236752 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:08,975][24592] Fps is (10 sec: 42600.2, 60 sec: 42325.4, 300 sec: 42459.6). Total num frames: 1939529728. Throughput: 0: 10558.9. Samples: 234879672. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:08,977][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:09,549][626795] Updated weights for policy 0, policy_version 236762 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:11,551][626795] Updated weights for policy 0, policy_version 236772 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:13,444][626795] Updated weights for policy 0, policy_version 236782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:13,975][24592] Fps is (10 sec: 41780.5, 60 sec: 42188.8, 300 sec: 42431.8). Total num frames: 1939734528. Throughput: 0: 10558.7. Samples: 234911112. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:13,977][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:15,397][626795] Updated weights for policy 0, policy_version 236792 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:17,286][626795] Updated weights for policy 0, policy_version 236802 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42188.8, 300 sec: 42431.8). Total num frames: 1939947520. Throughput: 0: 10562.9. Samples: 234974898. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:18,978][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:19,242][626795] Updated weights for policy 0, policy_version 236812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:21,197][626795] Updated weights for policy 0, policy_version 236822 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:23,069][626795] Updated weights for policy 0, policy_version 236832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:23,976][24592] Fps is (10 sec: 42597.8, 60 sec: 42188.7, 300 sec: 42404.0). Total num frames: 1940160512. Throughput: 0: 10569.2. Samples: 235038528. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:23,979][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:25,122][626795] Updated weights for policy 0, policy_version 236842 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:27,009][626795] Updated weights for policy 0, policy_version 236852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:28,912][626795] Updated weights for policy 0, policy_version 236862 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:28,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42188.7, 300 sec: 42431.8). Total num frames: 1940373504. Throughput: 0: 10564.9. Samples: 235069938. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:28,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:30,858][626795] Updated weights for policy 0, policy_version 236872 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:32,820][626795] Updated weights for policy 0, policy_version 236882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:33,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42325.3, 300 sec: 42431.8). Total num frames: 1940586496. Throughput: 0: 10554.1. Samples: 235133706. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:33,977][24592] Avg episode reward: [(0, '4.978')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:34,734][626795] Updated weights for policy 0, policy_version 236892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:36,633][626795] Updated weights for policy 0, policy_version 236902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:38,634][626795] Updated weights for policy 0, policy_version 236912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:38,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42325.4, 300 sec: 42431.8). Total num frames: 1940799488. Throughput: 0: 10560.5. Samples: 235197360. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:38,976][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:40,526][626795] Updated weights for policy 0, policy_version 236922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:42,516][626795] Updated weights for policy 0, policy_version 236932 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:43,975][24592] Fps is (10 sec: 41779.5, 60 sec: 42188.8, 300 sec: 42404.0). Total num frames: 1941004288. Throughput: 0: 10547.0. Samples: 235228386. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:43,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:44,441][626795] Updated weights for policy 0, policy_version 236942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:46,328][626795] Updated weights for policy 0, policy_version 236952 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:48,238][626795] Updated weights for policy 0, policy_version 236962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:48,976][24592] Fps is (10 sec: 41778.1, 60 sec: 42325.2, 300 sec: 42404.0). Total num frames: 1941217280. Throughput: 0: 10592.5. Samples: 235292754. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:48,978][24592] Avg episode reward: [(0, '5.026')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:50,260][626795] Updated weights for policy 0, policy_version 236972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:52,277][626795] Updated weights for policy 0, policy_version 236982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:53,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42188.8, 300 sec: 42404.2). Total num frames: 1941430272. Throughput: 0: 10555.5. Samples: 235354668. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:53,978][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:54,251][626795] Updated weights for policy 0, policy_version 236992 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:56,189][626795] Updated weights for policy 0, policy_version 237002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:25:58,127][626795] Updated weights for policy 0, policy_version 237012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:58,976][24592] Fps is (10 sec: 41778.4, 60 sec: 42188.8, 300 sec: 42376.2). Total num frames: 1941635072. Throughput: 0: 10544.7. Samples: 235385628. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:25:58,979][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:00,039][626795] Updated weights for policy 0, policy_version 237022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:01,995][626795] Updated weights for policy 0, policy_version 237032 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:03,975][24592] Fps is (10 sec: 40959.8, 60 sec: 42052.5, 300 sec: 42376.2). Total num frames: 1941839872. Throughput: 0: 10532.0. Samples: 235448838. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:03,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:04,019][626795] Updated weights for policy 0, policy_version 237042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:05,923][626795] Updated weights for policy 0, policy_version 237052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:07,887][626795] Updated weights for policy 0, policy_version 237062 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:08,976][24592] Fps is (10 sec: 42599.7, 60 sec: 42188.7, 300 sec: 42404.0). Total num frames: 1942061056. Throughput: 0: 10527.6. Samples: 235512270. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:08,977][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:09,750][626795] Updated weights for policy 0, policy_version 237072 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:11,732][626795] Updated weights for policy 0, policy_version 237082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:13,684][626795] Updated weights for policy 0, policy_version 237092 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:13,976][24592] Fps is (10 sec: 42596.5, 60 sec: 42188.5, 300 sec: 42348.4). Total num frames: 1942265856. Throughput: 0: 10531.2. Samples: 235543848. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:13,977][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:15,605][626795] Updated weights for policy 0, policy_version 237102 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:17,531][626795] Updated weights for policy 0, policy_version 237112 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:18,976][24592] Fps is (10 sec: 41777.9, 60 sec: 42188.5, 300 sec: 42376.2). Total num frames: 1942478848. Throughput: 0: 10521.5. Samples: 235607178. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:18,981][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:19,541][626795] Updated weights for policy 0, policy_version 237122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:21,318][626795] Updated weights for policy 0, policy_version 237132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:23,301][626795] Updated weights for policy 0, policy_version 237142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:23,976][24592] Fps is (10 sec: 42598.9, 60 sec: 42188.6, 300 sec: 42348.5). Total num frames: 1942691840. Throughput: 0: 10533.8. Samples: 235671384. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:23,977][24592] Avg episode reward: [(0, '4.869')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:25,301][626795] Updated weights for policy 0, policy_version 237152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:27,276][626795] Updated weights for policy 0, policy_version 237162 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:28,975][24592] Fps is (10 sec: 41781.0, 60 sec: 42052.3, 300 sec: 42320.7). Total num frames: 1942896640. Throughput: 0: 10519.5. Samples: 235701762. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:28,978][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:29,208][626795] Updated weights for policy 0, policy_version 237172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:31,189][626795] Updated weights for policy 0, policy_version 237182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:33,021][626795] Updated weights for policy 0, policy_version 237192 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:33,975][24592] Fps is (10 sec: 41780.9, 60 sec: 42052.3, 300 sec: 42320.7). Total num frames: 1943109632. Throughput: 0: 10502.2. Samples: 235765350. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:33,976][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:35,040][626795] Updated weights for policy 0, policy_version 237202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:36,990][626795] Updated weights for policy 0, policy_version 237212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:38,956][626795] Updated weights for policy 0, policy_version 237222 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:38,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42052.3, 300 sec: 42320.7). Total num frames: 1943322624. Throughput: 0: 10536.7. Samples: 235828818. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:38,977][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:40,845][626795] Updated weights for policy 0, policy_version 237232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:42,780][626795] Updated weights for policy 0, policy_version 237242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:43,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42188.8, 300 sec: 42348.5). Total num frames: 1943535616. Throughput: 0: 10549.7. Samples: 235860360. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:43,977][24592] Avg episode reward: [(0, '4.913')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:44,742][626795] Updated weights for policy 0, policy_version 237252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:46,664][626795] Updated weights for policy 0, policy_version 237262 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:48,557][626795] Updated weights for policy 0, policy_version 237272 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:48,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42189.0, 300 sec: 42320.8). Total num frames: 1943748608. Throughput: 0: 10557.5. Samples: 235923924. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:48,977][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:50,545][626795] Updated weights for policy 0, policy_version 237282 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:52,308][626795] Updated weights for policy 0, policy_version 237292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:53,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42188.8, 300 sec: 42348.5). Total num frames: 1943961600. Throughput: 0: 10570.0. Samples: 235987920. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:53,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:54,357][626795] Updated weights for policy 0, policy_version 237302 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:56,328][626795] Updated weights for policy 0, policy_version 237312 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:26:58,260][626795] Updated weights for policy 0, policy_version 237322 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:58,975][24592] Fps is (10 sec: 41778.9, 60 sec: 42189.1, 300 sec: 42320.7). Total num frames: 1944166400. Throughput: 0: 10566.1. Samples: 236019318. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:26:58,979][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:00,239][626795] Updated weights for policy 0, policy_version 237332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:02,256][626795] Updated weights for policy 0, policy_version 237342 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42325.4, 300 sec: 42320.7). Total num frames: 1944379392. Throughput: 0: 10551.7. Samples: 236082000. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:03,976][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:04,020][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000237352_1944387584.pth...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:04,021][626795] Updated weights for policy 0, policy_version 237352 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:04,085][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000236113_1934237696.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:06,101][626795] Updated weights for policy 0, policy_version 237362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:08,006][626795] Updated weights for policy 0, policy_version 237372 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:08,976][24592] Fps is (10 sec: 41777.8, 60 sec: 42052.1, 300 sec: 42292.9). Total num frames: 1944584192. Throughput: 0: 10525.9. Samples: 236145048. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:08,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:10,076][626795] Updated weights for policy 0, policy_version 237382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:11,951][626795] Updated weights for policy 0, policy_version 237392 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:13,862][626795] Updated weights for policy 0, policy_version 237402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42189.1, 300 sec: 42293.0). Total num frames: 1944797184. Throughput: 0: 10538.3. Samples: 236175984. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:13,976][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:15,878][626795] Updated weights for policy 0, policy_version 237412 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:17,661][626795] Updated weights for policy 0, policy_version 237422 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:18,976][24592] Fps is (10 sec: 42598.2, 60 sec: 42188.8, 300 sec: 42292.9). Total num frames: 1945010176. Throughput: 0: 10558.3. Samples: 236240478. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:18,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:19,731][626795] Updated weights for policy 0, policy_version 237432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:21,555][626795] Updated weights for policy 0, policy_version 237442 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:23,545][626795] Updated weights for policy 0, policy_version 237452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:23,975][24592] Fps is (10 sec: 42597.9, 60 sec: 42189.0, 300 sec: 42292.9). Total num frames: 1945223168. Throughput: 0: 10564.8. Samples: 236304234. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:23,977][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:25,384][626795] Updated weights for policy 0, policy_version 237462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:27,292][626795] Updated weights for policy 0, policy_version 237472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:28,976][24592] Fps is (10 sec: 42599.8, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 1945436160. Throughput: 0: 10566.9. Samples: 236335872. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:28,977][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:29,298][626795] Updated weights for policy 0, policy_version 237482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:31,325][626795] Updated weights for policy 0, policy_version 237492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:33,168][626795] Updated weights for policy 0, policy_version 237502 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:33,975][24592] Fps is (10 sec: 41779.8, 60 sec: 42188.8, 300 sec: 42265.2). Total num frames: 1945640960. Throughput: 0: 10558.9. Samples: 236399076. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:33,976][24592] Avg episode reward: [(0, '4.419')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:35,197][626795] Updated weights for policy 0, policy_version 237512 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:37,158][626795] Updated weights for policy 0, policy_version 237522 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:38,976][24592] Fps is (10 sec: 41779.2, 60 sec: 42188.7, 300 sec: 42265.2). Total num frames: 1945853952. Throughput: 0: 10541.4. Samples: 236462286. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:38,977][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:39,032][626795] Updated weights for policy 0, policy_version 237532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:40,899][626795] Updated weights for policy 0, policy_version 237542 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:42,935][626795] Updated weights for policy 0, policy_version 237552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:43,991][24592] Fps is (10 sec: 42531.7, 60 sec: 42177.8, 300 sec: 42262.9). Total num frames: 1946066944. Throughput: 0: 10536.6. Samples: 236493630. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:43,992][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:44,914][626795] Updated weights for policy 0, policy_version 237562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:46,817][626795] Updated weights for policy 0, policy_version 237572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:48,663][626795] Updated weights for policy 0, policy_version 237582 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:48,976][24592] Fps is (10 sec: 41778.8, 60 sec: 42052.1, 300 sec: 42237.4). Total num frames: 1946271744. Throughput: 0: 10567.6. Samples: 236557542. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:48,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:50,623][626795] Updated weights for policy 0, policy_version 237592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:52,487][626795] Updated weights for policy 0, policy_version 237602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:53,976][24592] Fps is (10 sec: 42664.2, 60 sec: 42188.6, 300 sec: 42265.1). Total num frames: 1946492928. Throughput: 0: 10574.8. Samples: 236620914. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:53,977][24592] Avg episode reward: [(0, '4.968')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:54,575][626795] Updated weights for policy 0, policy_version 237612 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:56,542][626795] Updated weights for policy 0, policy_version 237622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:27:58,405][626795] Updated weights for policy 0, policy_version 237632 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:58,975][24592] Fps is (10 sec: 43418.3, 60 sec: 42325.4, 300 sec: 42265.2). Total num frames: 1946705920. Throughput: 0: 10584.4. Samples: 236652282. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:27:58,977][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:00,305][626795] Updated weights for policy 0, policy_version 237642 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:02,323][626795] Updated weights for policy 0, policy_version 237652 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:03,975][24592] Fps is (10 sec: 42599.4, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 1946918912. Throughput: 0: 10555.8. Samples: 236715486. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:03,977][24592] Avg episode reward: [(0, '4.844')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:04,305][626795] Updated weights for policy 0, policy_version 237662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:06,121][626795] Updated weights for policy 0, policy_version 237672 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:08,084][626795] Updated weights for policy 0, policy_version 237682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:08,976][24592] Fps is (10 sec: 41776.5, 60 sec: 42325.1, 300 sec: 42237.3). Total num frames: 1947123712. Throughput: 0: 10556.3. Samples: 236779272. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:08,978][24592] Avg episode reward: [(0, '4.367')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:10,107][626795] Updated weights for policy 0, policy_version 237692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:12,056][626795] Updated weights for policy 0, policy_version 237702 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:13,888][626795] Updated weights for policy 0, policy_version 237712 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:13,978][24592] Fps is (10 sec: 41768.2, 60 sec: 42323.5, 300 sec: 42264.8). Total num frames: 1947336704. Throughput: 0: 10550.5. Samples: 236810670. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:13,980][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:15,822][626795] Updated weights for policy 0, policy_version 237722 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:17,749][626795] Updated weights for policy 0, policy_version 237732 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:18,976][24592] Fps is (10 sec: 42600.4, 60 sec: 42325.5, 300 sec: 42292.9). Total num frames: 1947549696. Throughput: 0: 10573.7. Samples: 236874894. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:18,978][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:19,735][626795] Updated weights for policy 0, policy_version 237742 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:21,657][626795] Updated weights for policy 0, policy_version 237752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:23,514][626795] Updated weights for policy 0, policy_version 237762 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:23,976][24592] Fps is (10 sec: 42607.4, 60 sec: 42325.0, 300 sec: 42265.1). Total num frames: 1947762688. Throughput: 0: 10598.6. Samples: 236939226. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:23,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:25,493][626795] Updated weights for policy 0, policy_version 237772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:27,340][626795] Updated weights for policy 0, policy_version 237782 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:28,976][24592] Fps is (10 sec: 42597.7, 60 sec: 42325.2, 300 sec: 42265.1). Total num frames: 1947975680. Throughput: 0: 10587.9. Samples: 236969922. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:28,977][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:29,323][626795] Updated weights for policy 0, policy_version 237792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:31,300][626795] Updated weights for policy 0, policy_version 237802 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:33,095][626795] Updated weights for policy 0, policy_version 237812 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:33,975][24592] Fps is (10 sec: 41781.5, 60 sec: 42325.3, 300 sec: 42237.4). Total num frames: 1948180480. Throughput: 0: 10599.9. Samples: 237034536. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:33,977][24592] Avg episode reward: [(0, '5.088')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:35,111][626795] Updated weights for policy 0, policy_version 237822 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:37,200][626795] Updated weights for policy 0, policy_version 237832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:38,947][626795] Updated weights for policy 0, policy_version 237842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:38,975][24592] Fps is (10 sec: 42600.2, 60 sec: 42462.0, 300 sec: 42292.9). Total num frames: 1948401664. Throughput: 0: 10591.5. Samples: 237097530. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:38,977][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:40,938][626795] Updated weights for policy 0, policy_version 237852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:42,857][626795] Updated weights for policy 0, policy_version 237862 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:43,976][24592] Fps is (10 sec: 42596.8, 60 sec: 42336.1, 300 sec: 42237.3). Total num frames: 1948606464. Throughput: 0: 10599.5. Samples: 237129264. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:43,977][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:44,883][626795] Updated weights for policy 0, policy_version 237872 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:46,778][626795] Updated weights for policy 0, policy_version 237882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:48,676][626795] Updated weights for policy 0, policy_version 237892 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42462.0, 300 sec: 42265.2). Total num frames: 1948819456. Throughput: 0: 10604.7. Samples: 237192696. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:48,977][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:50,638][626795] Updated weights for policy 0, policy_version 237902 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:52,587][626795] Updated weights for policy 0, policy_version 237912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:53,975][24592] Fps is (10 sec: 42599.9, 60 sec: 42325.5, 300 sec: 42265.2). Total num frames: 1949032448. Throughput: 0: 10610.0. Samples: 237256716. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:53,977][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:54,450][626795] Updated weights for policy 0, policy_version 237922 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:56,406][626795] Updated weights for policy 0, policy_version 237932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:28:58,378][626795] Updated weights for policy 0, policy_version 237942 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:58,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42325.4, 300 sec: 42265.2). Total num frames: 1949245440. Throughput: 0: 10608.9. Samples: 237288042. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:28:58,976][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:00,242][626795] Updated weights for policy 0, policy_version 237952 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:02,275][626795] Updated weights for policy 0, policy_version 237962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:03,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 1949458432. Throughput: 0: 10590.4. Samples: 237351462. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:03,977][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000237971_1949458432.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:04,076][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000236733_1939316736.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:04,117][626795] Updated weights for policy 0, policy_version 237972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:06,116][626795] Updated weights for policy 0, policy_version 237982 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:08,070][626795] Updated weights for policy 0, policy_version 237992 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:08,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42325.8, 300 sec: 42237.4). Total num frames: 1949663232. Throughput: 0: 10559.5. Samples: 237414396. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:08,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:10,018][626795] Updated weights for policy 0, policy_version 238002 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:11,959][626795] Updated weights for policy 0, policy_version 238012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:13,975][626795] Updated weights for policy 0, policy_version 238022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:13,976][24592] Fps is (10 sec: 40959.4, 60 sec: 42190.5, 300 sec: 42209.6). Total num frames: 1949868032. Throughput: 0: 10573.5. Samples: 237445728. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:13,976][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:15,823][626795] Updated weights for policy 0, policy_version 238032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:17,666][626795] Updated weights for policy 0, policy_version 238042 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:18,975][24592] Fps is (10 sec: 42597.9, 60 sec: 42325.4, 300 sec: 42237.4). Total num frames: 1950089216. Throughput: 0: 10571.8. Samples: 237510270. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:18,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:19,680][626795] Updated weights for policy 0, policy_version 238052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:21,584][626795] Updated weights for policy 0, policy_version 238062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:23,532][626795] Updated weights for policy 0, policy_version 238072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:23,975][24592] Fps is (10 sec: 43418.4, 60 sec: 42325.7, 300 sec: 42237.4). Total num frames: 1950302208. Throughput: 0: 10582.4. Samples: 237573738. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:23,977][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:25,435][626795] Updated weights for policy 0, policy_version 238082 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:27,463][626795] Updated weights for policy 0, policy_version 238092 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:28,975][24592] Fps is (10 sec: 42598.9, 60 sec: 42325.6, 300 sec: 42265.2). Total num frames: 1950515200. Throughput: 0: 10582.4. Samples: 237605466. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:28,976][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:29,278][626795] Updated weights for policy 0, policy_version 238102 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:31,267][626795] Updated weights for policy 0, policy_version 238112 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:33,203][626795] Updated weights for policy 0, policy_version 238122 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:33,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42325.4, 300 sec: 42237.4). Total num frames: 1950720000. Throughput: 0: 10586.1. Samples: 237669072. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:33,977][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:35,107][626795] Updated weights for policy 0, policy_version 238132 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:37,062][626795] Updated weights for policy 0, policy_version 238142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:38,968][626795] Updated weights for policy 0, policy_version 238152 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:38,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 1950941184. Throughput: 0: 10569.9. Samples: 237732360. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:38,977][24592] Avg episode reward: [(0, '4.892')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:40,914][626795] Updated weights for policy 0, policy_version 238162 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:43,003][626795] Updated weights for policy 0, policy_version 238172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:43,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42325.6, 300 sec: 42265.2). Total num frames: 1951145984. Throughput: 0: 10565.3. Samples: 237763482. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:43,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:44,924][626795] Updated weights for policy 0, policy_version 238182 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:46,808][626795] Updated weights for policy 0, policy_version 238192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:48,682][626795] Updated weights for policy 0, policy_version 238202 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:48,977][24592] Fps is (10 sec: 41774.4, 60 sec: 42324.5, 300 sec: 42237.2). Total num frames: 1951358976. Throughput: 0: 10570.9. Samples: 237827166. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:48,979][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:50,680][626795] Updated weights for policy 0, policy_version 238212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:52,547][626795] Updated weights for policy 0, policy_version 238222 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:53,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 1951571968. Throughput: 0: 10576.9. Samples: 237890358. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:53,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:54,542][626795] Updated weights for policy 0, policy_version 238232 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:56,595][626795] Updated weights for policy 0, policy_version 238242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:29:58,459][626795] Updated weights for policy 0, policy_version 238252 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:58,976][24592] Fps is (10 sec: 42601.3, 60 sec: 42325.0, 300 sec: 42265.1). Total num frames: 1951784960. Throughput: 0: 10580.1. Samples: 237921834. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:29:58,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:00,391][626795] Updated weights for policy 0, policy_version 238262 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:02,259][626795] Updated weights for policy 0, policy_version 238272 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42188.9, 300 sec: 42237.4). Total num frames: 1951989760. Throughput: 0: 10558.8. Samples: 237985416. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:03,976][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:04,356][626795] Updated weights for policy 0, policy_version 238282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:06,212][626795] Updated weights for policy 0, policy_version 238292 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:08,122][626795] Updated weights for policy 0, policy_version 238302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:08,975][24592] Fps is (10 sec: 41781.2, 60 sec: 42325.4, 300 sec: 42265.2). Total num frames: 1952202752. Throughput: 0: 10551.3. Samples: 238048548. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:08,977][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:10,035][626795] Updated weights for policy 0, policy_version 238312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:10,718][626772] Signal inference workers to stop experience collection... (3200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:10,719][626772] Signal inference workers to resume experience collection... (3200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:10,736][626795] InferenceWorker_p0-w0: stopping experience collection (3200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:10,744][626795] InferenceWorker_p0-w0: resuming experience collection (3200 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:12,037][626795] Updated weights for policy 0, policy_version 238322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:13,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42462.0, 300 sec: 42265.2). Total num frames: 1952415744. Throughput: 0: 10542.5. Samples: 238079880. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:13,976][626795] Updated weights for policy 0, policy_version 238332 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:13,976][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:15,937][626795] Updated weights for policy 0, policy_version 238342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:17,891][626795] Updated weights for policy 0, policy_version 238352 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:18,976][24592] Fps is (10 sec: 41777.4, 60 sec: 42188.6, 300 sec: 42237.4). Total num frames: 1952620544. Throughput: 0: 10523.4. Samples: 238142628. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:18,977][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:19,927][626795] Updated weights for policy 0, policy_version 238362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:21,811][626795] Updated weights for policy 0, policy_version 238372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:23,777][626795] Updated weights for policy 0, policy_version 238382 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:23,977][24592] Fps is (10 sec: 41771.5, 60 sec: 42187.5, 300 sec: 42237.1). Total num frames: 1952833536. Throughput: 0: 10518.8. Samples: 238205724. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:23,979][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:25,747][626795] Updated weights for policy 0, policy_version 238392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:27,659][626795] Updated weights for policy 0, policy_version 238402 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:28,975][24592] Fps is (10 sec: 41780.9, 60 sec: 42052.3, 300 sec: 42209.6). Total num frames: 1953038336. Throughput: 0: 10520.5. Samples: 238236906. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:28,976][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:29,582][626795] Updated weights for policy 0, policy_version 238412 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:31,589][626795] Updated weights for policy 0, policy_version 238422 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:33,460][626795] Updated weights for policy 0, policy_version 238432 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:33,976][24592] Fps is (10 sec: 41783.6, 60 sec: 42188.2, 300 sec: 42209.5). Total num frames: 1953251328. Throughput: 0: 10517.3. Samples: 238300440. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:33,978][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:35,354][626795] Updated weights for policy 0, policy_version 238442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:37,315][626795] Updated weights for policy 0, policy_version 238452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:38,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42052.3, 300 sec: 42237.4). Total num frames: 1953464320. Throughput: 0: 10532.9. Samples: 238364340. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:38,978][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:39,358][626795] Updated weights for policy 0, policy_version 238462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:41,152][626795] Updated weights for policy 0, policy_version 238472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:43,169][626795] Updated weights for policy 0, policy_version 238482 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:43,978][24592] Fps is (10 sec: 42592.0, 60 sec: 42187.2, 300 sec: 42237.1). Total num frames: 1953677312. Throughput: 0: 10538.6. Samples: 238396092. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:43,978][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:45,147][626795] Updated weights for policy 0, policy_version 238492 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:47,080][626795] Updated weights for policy 0, policy_version 238502 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42053.1, 300 sec: 42209.6). Total num frames: 1953882112. Throughput: 0: 10513.6. Samples: 238458528. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:48,978][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:49,044][626795] Updated weights for policy 0, policy_version 238512 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:50,987][626795] Updated weights for policy 0, policy_version 238522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:53,005][626795] Updated weights for policy 0, policy_version 238532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:53,975][24592] Fps is (10 sec: 41788.4, 60 sec: 42052.2, 300 sec: 42237.5). Total num frames: 1954095104. Throughput: 0: 10519.6. Samples: 238521930. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:53,979][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:54,877][626795] Updated weights for policy 0, policy_version 238542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:56,759][626795] Updated weights for policy 0, policy_version 238552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:30:58,743][626795] Updated weights for policy 0, policy_version 238562 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:58,978][24592] Fps is (10 sec: 42586.2, 60 sec: 42050.6, 300 sec: 42264.8). Total num frames: 1954308096. Throughput: 0: 10521.2. Samples: 238553364. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:30:58,980][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:00,712][626795] Updated weights for policy 0, policy_version 238572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:02,681][626795] Updated weights for policy 0, policy_version 238582 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:03,976][24592] Fps is (10 sec: 42597.8, 60 sec: 42188.6, 300 sec: 42237.4). Total num frames: 1954521088. Throughput: 0: 10537.1. Samples: 238616796. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:03,976][24592] Avg episode reward: [(0, '4.970')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000238589_1954521088.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:04,057][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000237352_1944387584.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:04,600][626795] Updated weights for policy 0, policy_version 238592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:06,577][626795] Updated weights for policy 0, policy_version 238602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:08,373][626795] Updated weights for policy 0, policy_version 238612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:08,975][24592] Fps is (10 sec: 42610.4, 60 sec: 42188.8, 300 sec: 42265.2). Total num frames: 1954734080. Throughput: 0: 10552.0. Samples: 238680546. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:08,976][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:10,328][626795] Updated weights for policy 0, policy_version 238622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:12,338][626795] Updated weights for policy 0, policy_version 238632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:13,976][24592] Fps is (10 sec: 41779.3, 60 sec: 42052.1, 300 sec: 42237.4). Total num frames: 1954938880. Throughput: 0: 10551.2. Samples: 238711710. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:13,976][24592] Avg episode reward: [(0, '5.111')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:14,321][626795] Updated weights for policy 0, policy_version 238642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:16,133][626795] Updated weights for policy 0, policy_version 238652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:18,096][626795] Updated weights for policy 0, policy_version 238662 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:18,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42189.1, 300 sec: 42237.4). Total num frames: 1955151872. Throughput: 0: 10569.4. Samples: 238776054. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:18,976][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:20,068][626795] Updated weights for policy 0, policy_version 238672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:21,981][626795] Updated weights for policy 0, policy_version 238682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:23,954][626795] Updated weights for policy 0, policy_version 238692 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:23,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42190.0, 300 sec: 42265.2). Total num frames: 1955364864. Throughput: 0: 10545.3. Samples: 238838880. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:23,976][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:25,849][626795] Updated weights for policy 0, policy_version 238702 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:27,765][626795] Updated weights for policy 0, policy_version 238712 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:28,976][24592] Fps is (10 sec: 42596.7, 60 sec: 42325.1, 300 sec: 42265.1). Total num frames: 1955577856. Throughput: 0: 10549.0. Samples: 238870776. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:28,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:29,670][626795] Updated weights for policy 0, policy_version 238722 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:31,667][626795] Updated weights for policy 0, policy_version 238732 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:33,578][626795] Updated weights for policy 0, policy_version 238742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:33,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42325.9, 300 sec: 42265.2). Total num frames: 1955790848. Throughput: 0: 10575.2. Samples: 238934412. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:33,978][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:35,528][626795] Updated weights for policy 0, policy_version 238752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:37,503][626795] Updated weights for policy 0, policy_version 238762 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:38,975][24592] Fps is (10 sec: 41780.7, 60 sec: 42188.8, 300 sec: 42237.4). Total num frames: 1955995648. Throughput: 0: 10578.0. Samples: 238997940. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:38,977][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:39,430][626795] Updated weights for policy 0, policy_version 238772 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:41,385][626795] Updated weights for policy 0, policy_version 238782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:43,273][626795] Updated weights for policy 0, policy_version 238792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42190.4, 300 sec: 42237.4). Total num frames: 1956208640. Throughput: 0: 10577.6. Samples: 239029326. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:43,977][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:45,158][626795] Updated weights for policy 0, policy_version 238802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:47,215][626795] Updated weights for policy 0, policy_version 238812 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:48,976][24592] Fps is (10 sec: 42597.6, 60 sec: 42325.2, 300 sec: 42237.4). Total num frames: 1956421632. Throughput: 0: 10570.8. Samples: 239092482. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:48,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:49,165][626795] Updated weights for policy 0, policy_version 238822 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:51,117][626795] Updated weights for policy 0, policy_version 238832 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:52,946][626795] Updated weights for policy 0, policy_version 238842 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:53,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42188.8, 300 sec: 42237.4). Total num frames: 1956626432. Throughput: 0: 10561.3. Samples: 239155806. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:53,977][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:55,003][626795] Updated weights for policy 0, policy_version 238852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:56,893][626795] Updated weights for policy 0, policy_version 238862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:31:58,909][626795] Updated weights for policy 0, policy_version 238872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:58,976][24592] Fps is (10 sec: 41778.4, 60 sec: 42190.5, 300 sec: 42237.3). Total num frames: 1956839424. Throughput: 0: 10560.2. Samples: 239186922. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:31:58,977][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:00,777][626795] Updated weights for policy 0, policy_version 238882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:02,631][626795] Updated weights for policy 0, policy_version 238892 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:03,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42189.0, 300 sec: 42265.2). Total num frames: 1957052416. Throughput: 0: 10549.1. Samples: 239250762. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:03,977][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:04,612][626795] Updated weights for policy 0, policy_version 238902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:06,528][626795] Updated weights for policy 0, policy_version 238912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:08,491][626795] Updated weights for policy 0, policy_version 238922 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:08,975][24592] Fps is (10 sec: 42600.0, 60 sec: 42188.8, 300 sec: 42265.2). Total num frames: 1957265408. Throughput: 0: 10578.0. Samples: 239314890. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:08,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:10,455][626795] Updated weights for policy 0, policy_version 238932 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:12,435][626795] Updated weights for policy 0, policy_version 238942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:13,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42188.9, 300 sec: 42237.5). Total num frames: 1957470208. Throughput: 0: 10563.3. Samples: 239346120. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:13,977][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:14,302][626795] Updated weights for policy 0, policy_version 238952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:16,295][626795] Updated weights for policy 0, policy_version 238962 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:18,207][626795] Updated weights for policy 0, policy_version 238972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:18,976][24592] Fps is (10 sec: 42596.1, 60 sec: 42324.9, 300 sec: 42265.1). Total num frames: 1957691392. Throughput: 0: 10557.1. Samples: 239409486. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:18,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:20,142][626795] Updated weights for policy 0, policy_version 238982 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:22,062][626795] Updated weights for policy 0, policy_version 238992 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:23,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42188.8, 300 sec: 42237.4). Total num frames: 1957896192. Throughput: 0: 10561.1. Samples: 239473188. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:23,976][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:24,083][626795] Updated weights for policy 0, policy_version 239002 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:25,920][626795] Updated weights for policy 0, policy_version 239012 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:27,935][626795] Updated weights for policy 0, policy_version 239022 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:28,975][24592] Fps is (10 sec: 41781.6, 60 sec: 42189.1, 300 sec: 42265.2). Total num frames: 1958109184. Throughput: 0: 10541.2. Samples: 239503680. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:28,978][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:29,845][626795] Updated weights for policy 0, policy_version 239032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:31,791][626795] Updated weights for policy 0, policy_version 239042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:33,738][626795] Updated weights for policy 0, policy_version 239052 (0.0039)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:33,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42188.8, 300 sec: 42265.2). Total num frames: 1958322176. Throughput: 0: 10553.4. Samples: 239567382. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:33,977][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:35,724][626795] Updated weights for policy 0, policy_version 239062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:37,589][626795] Updated weights for policy 0, policy_version 239072 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:38,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42188.8, 300 sec: 42239.6). Total num frames: 1958526976. Throughput: 0: 10548.0. Samples: 239630466. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:38,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:39,568][626795] Updated weights for policy 0, policy_version 239082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:41,496][626795] Updated weights for policy 0, policy_version 239092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:43,547][626795] Updated weights for policy 0, policy_version 239102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:43,978][24592] Fps is (10 sec: 41768.5, 60 sec: 42187.0, 300 sec: 42264.8). Total num frames: 1958739968. Throughput: 0: 10568.4. Samples: 239662524. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:43,979][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:45,378][626795] Updated weights for policy 0, policy_version 239112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:47,361][626795] Updated weights for policy 0, policy_version 239122 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:48,976][24592] Fps is (10 sec: 42597.1, 60 sec: 42188.8, 300 sec: 42237.4). Total num frames: 1958952960. Throughput: 0: 10558.3. Samples: 239725890. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:48,977][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:49,278][626795] Updated weights for policy 0, policy_version 239132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:51,199][626795] Updated weights for policy 0, policy_version 239142 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:53,166][626795] Updated weights for policy 0, policy_version 239152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:53,975][24592] Fps is (10 sec: 42609.3, 60 sec: 42325.3, 300 sec: 42237.4). Total num frames: 1959165952. Throughput: 0: 10539.2. Samples: 239789154. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:53,977][24592] Avg episode reward: [(0, '4.849')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:55,117][626795] Updated weights for policy 0, policy_version 239162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:57,107][626795] Updated weights for policy 0, policy_version 239172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:32:58,907][626795] Updated weights for policy 0, policy_version 239182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:58,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42325.6, 300 sec: 42237.4). Total num frames: 1959378944. Throughput: 0: 10549.2. Samples: 239820834. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:32:58,977][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:00,910][626795] Updated weights for policy 0, policy_version 239192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:02,851][626795] Updated weights for policy 0, policy_version 239202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42188.8, 300 sec: 42237.5). Total num frames: 1959583744. Throughput: 0: 10552.3. Samples: 239884332. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:03,979][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000239207_1959583744.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:04,066][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000237971_1949458432.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:04,909][626795] Updated weights for policy 0, policy_version 239212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:06,727][626795] Updated weights for policy 0, policy_version 239222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:08,746][626795] Updated weights for policy 0, policy_version 239232 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:08,975][24592] Fps is (10 sec: 41779.6, 60 sec: 42188.8, 300 sec: 42237.8). Total num frames: 1959796736. Throughput: 0: 10533.1. Samples: 239947176. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:08,976][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:10,654][626795] Updated weights for policy 0, policy_version 239242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:12,593][626795] Updated weights for policy 0, policy_version 239252 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:13,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42325.4, 300 sec: 42237.4). Total num frames: 1960009728. Throughput: 0: 10554.1. Samples: 239978616. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:13,977][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:14,516][626795] Updated weights for policy 0, policy_version 239262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:16,437][626795] Updated weights for policy 0, policy_version 239272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:18,402][626795] Updated weights for policy 0, policy_version 239282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:18,976][24592] Fps is (10 sec: 41777.2, 60 sec: 42052.3, 300 sec: 42209.6). Total num frames: 1960214528. Throughput: 0: 10559.5. Samples: 240042564. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:18,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:20,452][626795] Updated weights for policy 0, policy_version 239292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:22,237][626795] Updated weights for policy 0, policy_version 239302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:23,975][24592] Fps is (10 sec: 41778.8, 60 sec: 42188.8, 300 sec: 42209.7). Total num frames: 1960427520. Throughput: 0: 10564.2. Samples: 240105858. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:23,977][24592] Avg episode reward: [(0, '4.779')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:24,204][626795] Updated weights for policy 0, policy_version 239312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:26,129][626795] Updated weights for policy 0, policy_version 239322 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:28,021][626795] Updated weights for policy 0, policy_version 239332 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:28,975][24592] Fps is (10 sec: 42600.5, 60 sec: 42188.8, 300 sec: 42237.4). Total num frames: 1960640512. Throughput: 0: 10555.8. Samples: 240137508. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:28,977][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:30,049][626795] Updated weights for policy 0, policy_version 239342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:32,068][626795] Updated weights for policy 0, policy_version 239352 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:33,975][24592] Fps is (10 sec: 41779.5, 60 sec: 42052.3, 300 sec: 42181.9). Total num frames: 1960845312. Throughput: 0: 10541.1. Samples: 240200238. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:33,976][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:34,014][626795] Updated weights for policy 0, policy_version 239362 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:35,849][626795] Updated weights for policy 0, policy_version 239372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:37,858][626795] Updated weights for policy 0, policy_version 239382 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:38,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42188.8, 300 sec: 42209.7). Total num frames: 1961058304. Throughput: 0: 10546.3. Samples: 240263736. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:38,976][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:39,864][626795] Updated weights for policy 0, policy_version 239392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:41,709][626795] Updated weights for policy 0, policy_version 239402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:43,694][626795] Updated weights for policy 0, policy_version 239412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:43,976][24592] Fps is (10 sec: 43416.9, 60 sec: 42327.1, 300 sec: 42237.4). Total num frames: 1961279488. Throughput: 0: 10540.9. Samples: 240295176. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:43,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:45,643][626795] Updated weights for policy 0, policy_version 239422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:47,665][626795] Updated weights for policy 0, policy_version 239432 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:48,976][24592] Fps is (10 sec: 42597.0, 60 sec: 42188.8, 300 sec: 42209.6). Total num frames: 1961484288. Throughput: 0: 10526.7. Samples: 240358038. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:48,977][24592] Avg episode reward: [(0, '5.120')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:49,614][626795] Updated weights for policy 0, policy_version 239442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:51,503][626795] Updated weights for policy 0, policy_version 239452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:53,371][626795] Updated weights for policy 0, policy_version 239462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:53,975][24592] Fps is (10 sec: 40960.7, 60 sec: 42052.3, 300 sec: 42181.9). Total num frames: 1961689088. Throughput: 0: 10541.7. Samples: 240421554. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:53,977][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:55,359][626795] Updated weights for policy 0, policy_version 239472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:57,316][626795] Updated weights for policy 0, policy_version 239482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:58,975][24592] Fps is (10 sec: 41780.3, 60 sec: 42052.3, 300 sec: 42181.9). Total num frames: 1961902080. Throughput: 0: 10537.6. Samples: 240452808. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:33:58,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:33:59,242][626795] Updated weights for policy 0, policy_version 239492 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:01,205][626795] Updated weights for policy 0, policy_version 239502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:03,266][626795] Updated weights for policy 0, policy_version 239512 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:03,976][24592] Fps is (10 sec: 41776.1, 60 sec: 42051.8, 300 sec: 42181.8). Total num frames: 1962106880. Throughput: 0: 10513.0. Samples: 240515652. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:03,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:05,077][626795] Updated weights for policy 0, policy_version 239522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:07,069][626795] Updated weights for policy 0, policy_version 239532 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:08,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42052.3, 300 sec: 42209.7). Total num frames: 1962319872. Throughput: 0: 10522.3. Samples: 240579360. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:08,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:08,985][626795] Updated weights for policy 0, policy_version 239542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:10,860][626795] Updated weights for policy 0, policy_version 239552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:12,936][626795] Updated weights for policy 0, policy_version 239562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:13,977][24592] Fps is (10 sec: 42596.5, 60 sec: 42051.4, 300 sec: 42181.7). Total num frames: 1962532864. Throughput: 0: 10511.3. Samples: 240610530. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:13,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:14,902][626795] Updated weights for policy 0, policy_version 239572 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:16,716][626795] Updated weights for policy 0, policy_version 239582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:18,659][626795] Updated weights for policy 0, policy_version 239592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:18,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42189.2, 300 sec: 42181.9). Total num frames: 1962745856. Throughput: 0: 10531.1. Samples: 240674136. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:18,977][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:20,639][626795] Updated weights for policy 0, policy_version 239602 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:22,649][626795] Updated weights for policy 0, policy_version 239612 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:23,975][24592] Fps is (10 sec: 42603.4, 60 sec: 42188.9, 300 sec: 42181.9). Total num frames: 1962958848. Throughput: 0: 10525.7. Samples: 240737394. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:23,977][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:24,505][626795] Updated weights for policy 0, policy_version 239622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:26,479][626795] Updated weights for policy 0, policy_version 239632 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:28,384][626795] Updated weights for policy 0, policy_version 239642 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:28,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42188.8, 300 sec: 42209.6). Total num frames: 1963171840. Throughput: 0: 10529.9. Samples: 240769020. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:28,977][24592] Avg episode reward: [(0, '4.926')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:30,370][626795] Updated weights for policy 0, policy_version 239652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:32,311][626795] Updated weights for policy 0, policy_version 239662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42188.8, 300 sec: 42154.1). Total num frames: 1963376640. Throughput: 0: 10532.1. Samples: 240831978. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:33,976][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:34,298][626795] Updated weights for policy 0, policy_version 239672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:36,256][626795] Updated weights for policy 0, policy_version 239682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:38,186][626795] Updated weights for policy 0, policy_version 239692 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:38,975][24592] Fps is (10 sec: 40960.1, 60 sec: 42052.3, 300 sec: 42154.1). Total num frames: 1963581440. Throughput: 0: 10510.1. Samples: 240894510. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:38,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:40,200][626795] Updated weights for policy 0, policy_version 239702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:42,082][626795] Updated weights for policy 0, policy_version 239712 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:43,890][626795] Updated weights for policy 0, policy_version 239722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:43,976][24592] Fps is (10 sec: 42596.5, 60 sec: 42052.1, 300 sec: 42182.0). Total num frames: 1963802624. Throughput: 0: 10528.3. Samples: 240926586. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:43,977][24592] Avg episode reward: [(0, '4.960')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:45,889][626795] Updated weights for policy 0, policy_version 239732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:47,878][626795] Updated weights for policy 0, policy_version 239742 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:48,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42052.5, 300 sec: 42154.1). Total num frames: 1964007424. Throughput: 0: 10544.7. Samples: 240990156. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:48,976][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:49,831][626795] Updated weights for policy 0, policy_version 239752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:51,746][626795] Updated weights for policy 0, policy_version 239762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:53,615][626795] Updated weights for policy 0, policy_version 239772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:53,977][24592] Fps is (10 sec: 41773.5, 60 sec: 42187.5, 300 sec: 42153.9). Total num frames: 1964220416. Throughput: 0: 10542.1. Samples: 241053774. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:53,979][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:55,748][626795] Updated weights for policy 0, policy_version 239782 (0.0044)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:57,599][626795] Updated weights for policy 0, policy_version 239792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:58,975][24592] Fps is (10 sec: 42597.8, 60 sec: 42188.7, 300 sec: 42181.8). Total num frames: 1964433408. Throughput: 0: 10527.0. Samples: 241084236. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:34:58,976][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:34:59,572][626795] Updated weights for policy 0, policy_version 239802 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:01,499][626795] Updated weights for policy 0, policy_version 239812 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:03,545][626795] Updated weights for policy 0, policy_version 239822 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:03,976][24592] Fps is (10 sec: 41785.6, 60 sec: 42189.1, 300 sec: 42154.0). Total num frames: 1964638208. Throughput: 0: 10513.9. Samples: 241147266. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:03,979][24592] Avg episode reward: [(0, '4.821')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:03,998][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000239825_1964646400.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:04,072][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000238589_1954521088.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:05,532][626795] Updated weights for policy 0, policy_version 239832 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:07,453][626795] Updated weights for policy 0, policy_version 239842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:08,975][24592] Fps is (10 sec: 40960.4, 60 sec: 42052.2, 300 sec: 42126.3). Total num frames: 1964843008. Throughput: 0: 10503.3. Samples: 241210044. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:08,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:09,337][626795] Updated weights for policy 0, policy_version 239852 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:11,319][626795] Updated weights for policy 0, policy_version 239862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:13,341][626795] Updated weights for policy 0, policy_version 239872 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:13,976][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.9, 300 sec: 42154.1). Total num frames: 1965056000. Throughput: 0: 10489.3. Samples: 241241040. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:13,978][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:15,301][626795] Updated weights for policy 0, policy_version 239882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:17,034][626795] Updated weights for policy 0, policy_version 239892 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:18,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42052.2, 300 sec: 42154.3). Total num frames: 1965268992. Throughput: 0: 10498.8. Samples: 241304424. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:18,977][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:19,100][626795] Updated weights for policy 0, policy_version 239902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:21,089][626795] Updated weights for policy 0, policy_version 239912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:22,944][626795] Updated weights for policy 0, policy_version 239922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:23,975][24592] Fps is (10 sec: 41780.3, 60 sec: 41915.7, 300 sec: 42154.1). Total num frames: 1965473792. Throughput: 0: 10522.7. Samples: 241368030. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:23,976][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:24,935][626795] Updated weights for policy 0, policy_version 239932 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:26,905][626795] Updated weights for policy 0, policy_version 239942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:27,091][626772] Signal inference workers to stop experience collection... (3250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:27,092][626772] Signal inference workers to resume experience collection... (3250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:27,107][626795] InferenceWorker_p0-w0: stopping experience collection (3250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:27,108][626795] InferenceWorker_p0-w0: resuming experience collection (3250 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:28,670][626795] Updated weights for policy 0, policy_version 239952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:28,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41915.7, 300 sec: 42154.2). Total num frames: 1965686784. Throughput: 0: 10511.8. Samples: 241399614. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:28,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:30,840][626795] Updated weights for policy 0, policy_version 239962 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:32,589][626795] Updated weights for policy 0, policy_version 239972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:33,976][24592] Fps is (10 sec: 42596.5, 60 sec: 42051.9, 300 sec: 42154.0). Total num frames: 1965899776. Throughput: 0: 10502.0. Samples: 241462752. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:33,980][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:34,654][626795] Updated weights for policy 0, policy_version 239982 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:36,624][626795] Updated weights for policy 0, policy_version 239992 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:38,516][626795] Updated weights for policy 0, policy_version 240002 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:38,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42188.8, 300 sec: 42154.4). Total num frames: 1966112768. Throughput: 0: 10488.8. Samples: 241525752. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:38,977][24592] Avg episode reward: [(0, '4.922')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:40,438][626795] Updated weights for policy 0, policy_version 240012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:42,387][626795] Updated weights for policy 0, policy_version 240022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:43,975][24592] Fps is (10 sec: 41781.0, 60 sec: 41916.0, 300 sec: 42154.1). Total num frames: 1966317568. Throughput: 0: 10514.0. Samples: 241557366. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:43,976][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:44,359][626795] Updated weights for policy 0, policy_version 240032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:46,321][626795] Updated weights for policy 0, policy_version 240042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:48,272][626795] Updated weights for policy 0, policy_version 240052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:48,975][24592] Fps is (10 sec: 41778.9, 60 sec: 42052.2, 300 sec: 42154.1). Total num frames: 1966530560. Throughput: 0: 10529.8. Samples: 241621104. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:48,976][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:50,273][626795] Updated weights for policy 0, policy_version 240062 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:52,161][626795] Updated weights for policy 0, policy_version 240072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:53,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42053.4, 300 sec: 42154.5). Total num frames: 1966743552. Throughput: 0: 10526.2. Samples: 241683726. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:53,977][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:54,154][626795] Updated weights for policy 0, policy_version 240082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:56,077][626795] Updated weights for policy 0, policy_version 240092 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:58,067][626795] Updated weights for policy 0, policy_version 240102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:58,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42052.3, 300 sec: 42154.1). Total num frames: 1966956544. Throughput: 0: 10541.4. Samples: 241715400. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:35:58,976][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:35:59,933][626795] Updated weights for policy 0, policy_version 240112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:01,956][626795] Updated weights for policy 0, policy_version 240122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:03,976][24592] Fps is (10 sec: 40958.5, 60 sec: 41915.6, 300 sec: 42098.5). Total num frames: 1967153152. Throughput: 0: 10505.5. Samples: 241777176. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:03,978][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:04,196][626795] Updated weights for policy 0, policy_version 240132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:06,372][626795] Updated weights for policy 0, policy_version 240142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:08,438][626795] Updated weights for policy 0, policy_version 240152 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:08,975][24592] Fps is (10 sec: 38502.3, 60 sec: 41642.7, 300 sec: 42043.0). Total num frames: 1967341568. Throughput: 0: 10345.9. Samples: 241833594. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:08,976][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:10,560][626795] Updated weights for policy 0, policy_version 240162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:12,528][626795] Updated weights for policy 0, policy_version 240172 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:13,976][24592] Fps is (10 sec: 39320.9, 60 sec: 41505.9, 300 sec: 42015.1). Total num frames: 1967546368. Throughput: 0: 10332.6. Samples: 241864590. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:13,977][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:14,406][626795] Updated weights for policy 0, policy_version 240182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:16,434][626795] Updated weights for policy 0, policy_version 240192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:18,290][626795] Updated weights for policy 0, policy_version 240202 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41506.1, 300 sec: 42015.2). Total num frames: 1967759360. Throughput: 0: 10331.7. Samples: 241927674. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:18,977][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:20,269][626795] Updated weights for policy 0, policy_version 240212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:22,187][626795] Updated weights for policy 0, policy_version 240222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:23,975][24592] Fps is (10 sec: 42601.0, 60 sec: 41642.6, 300 sec: 42015.3). Total num frames: 1967972352. Throughput: 0: 10331.3. Samples: 241990662. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:23,976][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:24,204][626795] Updated weights for policy 0, policy_version 240232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:26,053][626795] Updated weights for policy 0, policy_version 240242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:28,022][626795] Updated weights for policy 0, policy_version 240252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:28,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41642.7, 300 sec: 42015.2). Total num frames: 1968185344. Throughput: 0: 10330.1. Samples: 242022222. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:28,977][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:29,993][626795] Updated weights for policy 0, policy_version 240262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:31,921][626795] Updated weights for policy 0, policy_version 240272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:33,843][626795] Updated weights for policy 0, policy_version 240282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:33,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41643.0, 300 sec: 42043.0). Total num frames: 1968398336. Throughput: 0: 10326.5. Samples: 242085798. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:33,978][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:35,939][626795] Updated weights for policy 0, policy_version 240292 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:37,743][626795] Updated weights for policy 0, policy_version 240302 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:38,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41506.1, 300 sec: 42015.2). Total num frames: 1968603136. Throughput: 0: 10336.9. Samples: 242148888. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:38,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:39,697][626795] Updated weights for policy 0, policy_version 240312 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:41,646][626795] Updated weights for policy 0, policy_version 240322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:43,579][626795] Updated weights for policy 0, policy_version 240332 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:43,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41642.6, 300 sec: 42015.3). Total num frames: 1968816128. Throughput: 0: 10338.7. Samples: 242180640. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:43,976][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:45,529][626795] Updated weights for policy 0, policy_version 240342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:47,401][626795] Updated weights for policy 0, policy_version 240352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:48,977][24592] Fps is (10 sec: 42592.8, 60 sec: 41641.8, 300 sec: 42042.8). Total num frames: 1969029120. Throughput: 0: 10367.9. Samples: 242243742. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:48,978][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:49,403][626795] Updated weights for policy 0, policy_version 240362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:51,337][626795] Updated weights for policy 0, policy_version 240372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:53,320][626795] Updated weights for policy 0, policy_version 240382 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:53,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41506.2, 300 sec: 42015.3). Total num frames: 1969233920. Throughput: 0: 10522.4. Samples: 242307102. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:53,976][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:55,270][626795] Updated weights for policy 0, policy_version 240392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:57,275][626795] Updated weights for policy 0, policy_version 240402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:58,976][24592] Fps is (10 sec: 41781.3, 60 sec: 41505.5, 300 sec: 42015.1). Total num frames: 1969446912. Throughput: 0: 10512.7. Samples: 242337666. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:36:58,977][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:36:59,200][626795] Updated weights for policy 0, policy_version 240412 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:01,120][626795] Updated weights for policy 0, policy_version 240422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:03,039][626795] Updated weights for policy 0, policy_version 240432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:03,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41642.8, 300 sec: 41987.4). Total num frames: 1969651712. Throughput: 0: 10542.7. Samples: 242402100. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:03,977][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000240437_1969659904.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:04,072][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000239207_1959583744.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:04,899][626795] Updated weights for policy 0, policy_version 240442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:06,925][626795] Updated weights for policy 0, policy_version 240452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:08,873][626795] Updated weights for policy 0, policy_version 240462 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:08,976][24592] Fps is (10 sec: 41781.7, 60 sec: 42052.1, 300 sec: 42015.2). Total num frames: 1969864704. Throughput: 0: 10536.5. Samples: 242464806. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:08,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:10,771][626795] Updated weights for policy 0, policy_version 240472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:12,678][626795] Updated weights for policy 0, policy_version 240482 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:13,976][24592] Fps is (10 sec: 42597.6, 60 sec: 42188.9, 300 sec: 41987.5). Total num frames: 1970077696. Throughput: 0: 10533.1. Samples: 242496216. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:13,977][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:14,744][626795] Updated weights for policy 0, policy_version 240492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:16,713][626795] Updated weights for policy 0, policy_version 240502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:18,664][626795] Updated weights for policy 0, policy_version 240512 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:18,975][24592] Fps is (10 sec: 41780.1, 60 sec: 42052.3, 300 sec: 41987.5). Total num frames: 1970282496. Throughput: 0: 10510.8. Samples: 242558784. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:18,976][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:20,606][626795] Updated weights for policy 0, policy_version 240522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:22,530][626795] Updated weights for policy 0, policy_version 240532 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:23,976][24592] Fps is (10 sec: 41780.6, 60 sec: 42052.2, 300 sec: 41987.4). Total num frames: 1970495488. Throughput: 0: 10518.1. Samples: 242622204. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:23,978][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:24,496][626795] Updated weights for policy 0, policy_version 240542 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:26,355][626795] Updated weights for policy 0, policy_version 240552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:28,326][626795] Updated weights for policy 0, policy_version 240562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:28,977][24592] Fps is (10 sec: 41773.1, 60 sec: 41914.7, 300 sec: 41959.5). Total num frames: 1970700288. Throughput: 0: 10503.0. Samples: 242653290. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:28,978][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:30,324][626795] Updated weights for policy 0, policy_version 240572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:32,349][626795] Updated weights for policy 0, policy_version 240582 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:33,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41915.6, 300 sec: 41987.5). Total num frames: 1970913280. Throughput: 0: 10498.6. Samples: 242716164. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:33,980][24592] Avg episode reward: [(0, '4.921')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:34,358][626795] Updated weights for policy 0, policy_version 240592 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:36,299][626795] Updated weights for policy 0, policy_version 240602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:38,121][626795] Updated weights for policy 0, policy_version 240612 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:38,975][24592] Fps is (10 sec: 42604.8, 60 sec: 42052.3, 300 sec: 41987.8). Total num frames: 1971126272. Throughput: 0: 10490.0. Samples: 242779152. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:38,977][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:40,088][626795] Updated weights for policy 0, policy_version 240622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:42,039][626795] Updated weights for policy 0, policy_version 240632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:43,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 1971331072. Throughput: 0: 10508.5. Samples: 242810538. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:43,979][24592] Avg episode reward: [(0, '4.801')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:44,041][626795] Updated weights for policy 0, policy_version 240642 (0.0033)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:45,974][626795] Updated weights for policy 0, policy_version 240652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:47,944][626795] Updated weights for policy 0, policy_version 240662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41916.7, 300 sec: 41959.7). Total num frames: 1971544064. Throughput: 0: 10479.4. Samples: 242873670. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:48,976][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:49,920][626795] Updated weights for policy 0, policy_version 240672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:51,860][626795] Updated weights for policy 0, policy_version 240682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:53,669][626795] Updated weights for policy 0, policy_version 240692 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:53,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41915.7, 300 sec: 41931.9). Total num frames: 1971748864. Throughput: 0: 10487.0. Samples: 242936718. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:53,978][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:55,710][626795] Updated weights for policy 0, policy_version 240702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:57,705][626795] Updated weights for policy 0, policy_version 240712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:58,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41916.2, 300 sec: 41959.7). Total num frames: 1971961856. Throughput: 0: 10483.0. Samples: 242967948. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:37:58,977][24592] Avg episode reward: [(0, '5.005')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:37:59,661][626795] Updated weights for policy 0, policy_version 240722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:01,579][626795] Updated weights for policy 0, policy_version 240732 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:03,514][626795] Updated weights for policy 0, policy_version 240742 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:03,976][24592] Fps is (10 sec: 42596.9, 60 sec: 42052.3, 300 sec: 41959.7). Total num frames: 1972174848. Throughput: 0: 10493.0. Samples: 243030972. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:03,976][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:05,429][626795] Updated weights for policy 0, policy_version 240752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:07,439][626795] Updated weights for policy 0, policy_version 240762 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:08,975][24592] Fps is (10 sec: 42599.5, 60 sec: 42052.5, 300 sec: 41959.7). Total num frames: 1972387840. Throughput: 0: 10496.3. Samples: 243094536. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:08,978][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:09,355][626795] Updated weights for policy 0, policy_version 240772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:11,362][626795] Updated weights for policy 0, policy_version 240782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:13,218][626795] Updated weights for policy 0, policy_version 240792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:13,975][24592] Fps is (10 sec: 42599.7, 60 sec: 42052.6, 300 sec: 41987.5). Total num frames: 1972600832. Throughput: 0: 10496.2. Samples: 243125604. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:13,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:15,225][626795] Updated weights for policy 0, policy_version 240802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:17,137][626795] Updated weights for policy 0, policy_version 240812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:18,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42052.3, 300 sec: 41959.7). Total num frames: 1972805632. Throughput: 0: 10503.1. Samples: 243188802. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:18,977][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:19,090][626795] Updated weights for policy 0, policy_version 240822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:21,014][626795] Updated weights for policy 0, policy_version 240832 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:23,012][626795] Updated weights for policy 0, policy_version 240842 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:23,975][24592] Fps is (10 sec: 41778.9, 60 sec: 42052.3, 300 sec: 41959.7). Total num frames: 1973018624. Throughput: 0: 10509.7. Samples: 243252090. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:23,977][24592] Avg episode reward: [(0, '4.972')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:24,998][626795] Updated weights for policy 0, policy_version 240852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:26,917][626795] Updated weights for policy 0, policy_version 240862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:28,858][626795] Updated weights for policy 0, policy_version 240872 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:28,975][24592] Fps is (10 sec: 41778.7, 60 sec: 42053.3, 300 sec: 41959.7). Total num frames: 1973223424. Throughput: 0: 10509.5. Samples: 243283464. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:28,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:30,776][626795] Updated weights for policy 0, policy_version 240882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:32,684][626795] Updated weights for policy 0, policy_version 240892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:33,975][24592] Fps is (10 sec: 41779.6, 60 sec: 42052.3, 300 sec: 41959.7). Total num frames: 1973436416. Throughput: 0: 10511.5. Samples: 243346686. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:33,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:34,667][626795] Updated weights for policy 0, policy_version 240902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:36,619][626795] Updated weights for policy 0, policy_version 240912 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:38,593][626795] Updated weights for policy 0, policy_version 240922 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:38,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 1973641216. Throughput: 0: 10511.6. Samples: 243409740. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:38,977][24592] Avg episode reward: [(0, '4.410')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:40,477][626795] Updated weights for policy 0, policy_version 240932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:42,456][626795] Updated weights for policy 0, policy_version 240942 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:43,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42188.8, 300 sec: 41959.7). Total num frames: 1973862400. Throughput: 0: 10513.4. Samples: 243441048. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:43,977][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:44,444][626795] Updated weights for policy 0, policy_version 240952 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:46,353][626795] Updated weights for policy 0, policy_version 240962 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:48,347][626795] Updated weights for policy 0, policy_version 240972 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:48,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42052.2, 300 sec: 41959.7). Total num frames: 1974067200. Throughput: 0: 10517.0. Samples: 243504234. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:48,980][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:50,296][626795] Updated weights for policy 0, policy_version 240982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:52,266][626795] Updated weights for policy 0, policy_version 240992 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:53,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42188.7, 300 sec: 41959.7). Total num frames: 1974280192. Throughput: 0: 10502.5. Samples: 243567150. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:53,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:54,264][626795] Updated weights for policy 0, policy_version 241002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:56,219][626795] Updated weights for policy 0, policy_version 241012 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:38:58,119][626795] Updated weights for policy 0, policy_version 241022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:58,975][24592] Fps is (10 sec: 41779.6, 60 sec: 42052.4, 300 sec: 41959.8). Total num frames: 1974484992. Throughput: 0: 10496.1. Samples: 243597930. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:38:58,977][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:00,106][626795] Updated weights for policy 0, policy_version 241032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:01,935][626795] Updated weights for policy 0, policy_version 241042 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:03,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41915.8, 300 sec: 41931.9). Total num frames: 1974689792. Throughput: 0: 10486.3. Samples: 243660690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:03,978][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:04,002][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000241052_1974697984.pth...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:04,012][626795] Updated weights for policy 0, policy_version 241052 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:04,074][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000239825_1964646400.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:06,052][626795] Updated weights for policy 0, policy_version 241062 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:07,849][626795] Updated weights for policy 0, policy_version 241072 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:08,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.7, 300 sec: 41932.1). Total num frames: 1974902784. Throughput: 0: 10497.4. Samples: 243724470. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:08,977][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:09,827][626795] Updated weights for policy 0, policy_version 241082 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:11,681][626795] Updated weights for policy 0, policy_version 241092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:13,696][626795] Updated weights for policy 0, policy_version 241102 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:13,975][24592] Fps is (10 sec: 42599.4, 60 sec: 41915.8, 300 sec: 41931.9). Total num frames: 1975115776. Throughput: 0: 10500.3. Samples: 243755976. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:13,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:15,580][626795] Updated weights for policy 0, policy_version 241112 (0.0036)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:17,616][626795] Updated weights for policy 0, policy_version 241122 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:18,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41915.6, 300 sec: 41904.1). Total num frames: 1975320576. Throughput: 0: 10495.3. Samples: 243818976. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:18,977][24592] Avg episode reward: [(0, '5.030')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:19,539][626795] Updated weights for policy 0, policy_version 241132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:21,523][626795] Updated weights for policy 0, policy_version 241142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:23,390][626795] Updated weights for policy 0, policy_version 241152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:23,990][24592] Fps is (10 sec: 41719.9, 60 sec: 41905.9, 300 sec: 41902.1). Total num frames: 1975533568. Throughput: 0: 10492.3. Samples: 243882042. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:23,991][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:25,461][626795] Updated weights for policy 0, policy_version 241162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:27,389][626795] Updated weights for policy 0, policy_version 241172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:28,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41915.8, 300 sec: 41904.2). Total num frames: 1975738368. Throughput: 0: 10493.2. Samples: 243913242. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:28,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:29,370][626795] Updated weights for policy 0, policy_version 241182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:31,373][626795] Updated weights for policy 0, policy_version 241192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:33,252][626795] Updated weights for policy 0, policy_version 241202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:33,976][24592] Fps is (10 sec: 41835.5, 60 sec: 41915.2, 300 sec: 41931.8). Total num frames: 1975951360. Throughput: 0: 10492.1. Samples: 243976386. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:33,978][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:35,234][626795] Updated weights for policy 0, policy_version 241212 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:37,150][626795] Updated weights for policy 0, policy_version 241222 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:38,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42052.2, 300 sec: 41904.2). Total num frames: 1976164352. Throughput: 0: 10488.9. Samples: 244039152. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:38,977][24592] Avg episode reward: [(0, '4.941')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:39,179][626795] Updated weights for policy 0, policy_version 241232 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:41,082][626795] Updated weights for policy 0, policy_version 241242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:43,049][626795] Updated weights for policy 0, policy_version 241252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:43,976][24592] Fps is (10 sec: 41780.6, 60 sec: 41778.9, 300 sec: 41904.1). Total num frames: 1976369152. Throughput: 0: 10502.4. Samples: 244070544. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:43,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:44,920][626795] Updated weights for policy 0, policy_version 241262 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:46,889][626795] Updated weights for policy 0, policy_version 241272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:48,816][626795] Updated weights for policy 0, policy_version 241282 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:48,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41915.8, 300 sec: 41904.4). Total num frames: 1976582144. Throughput: 0: 10513.8. Samples: 244133808. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:48,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:50,879][626795] Updated weights for policy 0, policy_version 241292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:52,807][626795] Updated weights for policy 0, policy_version 241302 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:53,975][24592] Fps is (10 sec: 42600.2, 60 sec: 41915.8, 300 sec: 41904.2). Total num frames: 1976795136. Throughput: 0: 10488.3. Samples: 244196442. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:53,976][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:54,738][626795] Updated weights for policy 0, policy_version 241312 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:56,752][626795] Updated weights for policy 0, policy_version 241322 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:39:58,716][626795] Updated weights for policy 0, policy_version 241332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:58,982][24592] Fps is (10 sec: 41750.3, 60 sec: 41910.9, 300 sec: 41903.2). Total num frames: 1976999936. Throughput: 0: 10471.5. Samples: 244227264. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:39:58,990][24592] Avg episode reward: [(0, '4.886')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:00,631][626795] Updated weights for policy 0, policy_version 241342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:02,494][626795] Updated weights for policy 0, policy_version 241352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.4, 300 sec: 41931.9). Total num frames: 1977212928. Throughput: 0: 10484.7. Samples: 244290786. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:03,978][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:04,548][626795] Updated weights for policy 0, policy_version 241362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:06,367][626795] Updated weights for policy 0, policy_version 241372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:08,318][626795] Updated weights for policy 0, policy_version 241382 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:08,975][24592] Fps is (10 sec: 42627.9, 60 sec: 42052.2, 300 sec: 41932.0). Total num frames: 1977425920. Throughput: 0: 10487.7. Samples: 244353840. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:08,977][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:10,350][626795] Updated weights for policy 0, policy_version 241392 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:12,176][626795] Updated weights for policy 0, policy_version 241402 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:13,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 1977630720. Throughput: 0: 10506.9. Samples: 244386054. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:13,976][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:14,176][626795] Updated weights for policy 0, policy_version 241412 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:16,184][626795] Updated weights for policy 0, policy_version 241422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:17,940][626795] Updated weights for policy 0, policy_version 241432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:18,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42052.4, 300 sec: 41931.9). Total num frames: 1977843712. Throughput: 0: 10517.9. Samples: 244449684. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:18,977][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:19,914][626795] Updated weights for policy 0, policy_version 241442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:21,886][626795] Updated weights for policy 0, policy_version 241452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:23,833][626795] Updated weights for policy 0, policy_version 241462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:23,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42062.2, 300 sec: 41931.9). Total num frames: 1978056704. Throughput: 0: 10532.7. Samples: 244513122. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:23,977][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:25,731][626795] Updated weights for policy 0, policy_version 241472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:27,739][626795] Updated weights for policy 0, policy_version 241482 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:28,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42188.8, 300 sec: 41932.0). Total num frames: 1978269696. Throughput: 0: 10527.4. Samples: 244544274. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:28,977][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:29,678][626795] Updated weights for policy 0, policy_version 241492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:31,652][626795] Updated weights for policy 0, policy_version 241502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:33,477][626795] Updated weights for policy 0, policy_version 241512 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:33,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42189.3, 300 sec: 41931.9). Total num frames: 1978482688. Throughput: 0: 10537.9. Samples: 244608012. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:33,977][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:35,547][626795] Updated weights for policy 0, policy_version 241522 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:37,445][626795] Updated weights for policy 0, policy_version 241532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:38,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.3, 300 sec: 41931.9). Total num frames: 1978687488. Throughput: 0: 10542.3. Samples: 244670844. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:38,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:39,471][626795] Updated weights for policy 0, policy_version 241542 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:41,449][626795] Updated weights for policy 0, policy_version 241552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:43,227][626795] Updated weights for policy 0, policy_version 241562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:43,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42189.1, 300 sec: 41931.9). Total num frames: 1978900480. Throughput: 0: 10558.8. Samples: 244702338. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:43,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:45,205][626795] Updated weights for policy 0, policy_version 241572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:47,165][626795] Updated weights for policy 0, policy_version 241582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:48,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42188.8, 300 sec: 41931.9). Total num frames: 1979113472. Throughput: 0: 10558.0. Samples: 244765896. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:48,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:49,029][626795] Updated weights for policy 0, policy_version 241592 (0.0038)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:51,049][626795] Updated weights for policy 0, policy_version 241602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:52,969][626795] Updated weights for policy 0, policy_version 241612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:53,976][24592] Fps is (10 sec: 42596.8, 60 sec: 42188.6, 300 sec: 41931.9). Total num frames: 1979326464. Throughput: 0: 10555.4. Samples: 244828836. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:53,978][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:55,068][626795] Updated weights for policy 0, policy_version 241622 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:56,914][626795] Updated weights for policy 0, policy_version 241632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:40:58,891][626795] Updated weights for policy 0, policy_version 241642 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42193.7, 300 sec: 41959.8). Total num frames: 1979531264. Throughput: 0: 10522.5. Samples: 244859568. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:40:58,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:00,835][626795] Updated weights for policy 0, policy_version 241652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:02,732][626795] Updated weights for policy 0, policy_version 241662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:03,976][24592] Fps is (10 sec: 40960.4, 60 sec: 42052.1, 300 sec: 42015.2). Total num frames: 1979736064. Throughput: 0: 10510.5. Samples: 244922658. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:03,978][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:04,002][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000241668_1979744256.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:04,097][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000240437_1969659904.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:04,913][626795] Updated weights for policy 0, policy_version 241672 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:06,766][626795] Updated weights for policy 0, policy_version 241682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:08,723][626795] Updated weights for policy 0, policy_version 241692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:08,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42052.3, 300 sec: 42043.1). Total num frames: 1979949056. Throughput: 0: 10487.3. Samples: 244985052. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:08,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:10,700][626795] Updated weights for policy 0, policy_version 241702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:12,702][626795] Updated weights for policy 0, policy_version 241712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:13,975][24592] Fps is (10 sec: 42599.5, 60 sec: 42188.8, 300 sec: 42043.0). Total num frames: 1980162048. Throughput: 0: 10494.7. Samples: 245016534. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:13,976][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:14,580][626795] Updated weights for policy 0, policy_version 241722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:16,577][626795] Updated weights for policy 0, policy_version 241732 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:18,437][626795] Updated weights for policy 0, policy_version 241742 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.2, 300 sec: 42015.2). Total num frames: 1980366848. Throughput: 0: 10486.0. Samples: 245079882. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:18,977][24592] Avg episode reward: [(0, '4.392')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:20,454][626795] Updated weights for policy 0, policy_version 241752 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:22,345][626795] Updated weights for policy 0, policy_version 241762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.3, 300 sec: 42015.2). Total num frames: 1980579840. Throughput: 0: 10489.3. Samples: 245142864. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:23,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:24,338][626795] Updated weights for policy 0, policy_version 241772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:26,318][626795] Updated weights for policy 0, policy_version 241782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:28,192][626795] Updated weights for policy 0, policy_version 241792 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:28,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42052.2, 300 sec: 42015.2). Total num frames: 1980792832. Throughput: 0: 10488.6. Samples: 245174328. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:28,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:30,171][626795] Updated weights for policy 0, policy_version 241802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:32,114][626795] Updated weights for policy 0, policy_version 241812 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:33,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41915.7, 300 sec: 42015.2). Total num frames: 1980997632. Throughput: 0: 10471.7. Samples: 245237124. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:33,976][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:34,069][626795] Updated weights for policy 0, policy_version 241822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:36,078][626795] Updated weights for policy 0, policy_version 241832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:38,075][626795] Updated weights for policy 0, policy_version 241842 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:38,976][24592] Fps is (10 sec: 40957.3, 60 sec: 41915.2, 300 sec: 41987.4). Total num frames: 1981202432. Throughput: 0: 10461.8. Samples: 245299620. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:38,977][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:40,008][626795] Updated weights for policy 0, policy_version 241852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:41,857][626795] Updated weights for policy 0, policy_version 241862 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:43,810][626795] Updated weights for policy 0, policy_version 241872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:43,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41915.5, 300 sec: 41987.6). Total num frames: 1981415424. Throughput: 0: 10488.7. Samples: 245331564. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:43,978][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:45,747][626795] Updated weights for policy 0, policy_version 241882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:47,644][626795] Updated weights for policy 0, policy_version 241892 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:48,991][24592] Fps is (10 sec: 42534.9, 60 sec: 41904.9, 300 sec: 42013.0). Total num frames: 1981628416. Throughput: 0: 10498.4. Samples: 245395248. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:48,992][24592] Avg episode reward: [(0, '4.911')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:49,615][626795] Updated weights for policy 0, policy_version 241902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:51,534][626795] Updated weights for policy 0, policy_version 241912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:53,475][626795] Updated weights for policy 0, policy_version 241922 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:53,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41779.5, 300 sec: 41987.6). Total num frames: 1981833216. Throughput: 0: 10527.6. Samples: 245458794. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:53,977][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:55,557][626795] Updated weights for policy 0, policy_version 241932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:57,455][626795] Updated weights for policy 0, policy_version 241942 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:58,975][24592] Fps is (10 sec: 42665.1, 60 sec: 42052.3, 300 sec: 42043.1). Total num frames: 1982054400. Throughput: 0: 10512.7. Samples: 245489604. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:41:58,976][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:41:59,307][626795] Updated weights for policy 0, policy_version 241952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:01,284][626795] Updated weights for policy 0, policy_version 241962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:03,197][626795] Updated weights for policy 0, policy_version 241972 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:03,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42052.5, 300 sec: 42015.3). Total num frames: 1982259200. Throughput: 0: 10522.6. Samples: 245553396. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:03,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:05,132][626795] Updated weights for policy 0, policy_version 241982 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:07,096][626795] Updated weights for policy 0, policy_version 241992 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:08,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.3, 300 sec: 42015.3). Total num frames: 1982472192. Throughput: 0: 10523.2. Samples: 245616408. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:08,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:09,106][626795] Updated weights for policy 0, policy_version 242002 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:10,878][626795] Updated weights for policy 0, policy_version 242012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:12,903][626795] Updated weights for policy 0, policy_version 242022 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:13,975][24592] Fps is (10 sec: 42597.7, 60 sec: 42052.2, 300 sec: 42043.0). Total num frames: 1982685184. Throughput: 0: 10536.1. Samples: 245648454. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:13,977][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:14,854][626795] Updated weights for policy 0, policy_version 242032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:16,871][626795] Updated weights for policy 0, policy_version 242042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:18,691][626795] Updated weights for policy 0, policy_version 242052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:18,976][24592] Fps is (10 sec: 41778.2, 60 sec: 42052.1, 300 sec: 42015.2). Total num frames: 1982889984. Throughput: 0: 10554.2. Samples: 245712066. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:18,993][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:20,620][626795] Updated weights for policy 0, policy_version 242062 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:22,539][626795] Updated weights for policy 0, policy_version 242072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:23,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42188.8, 300 sec: 42071.0). Total num frames: 1983111168. Throughput: 0: 10580.7. Samples: 245775744. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:23,977][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:24,447][626795] Updated weights for policy 0, policy_version 242082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:26,411][626795] Updated weights for policy 0, policy_version 242092 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:28,353][626795] Updated weights for policy 0, policy_version 242102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:28,975][24592] Fps is (10 sec: 43418.6, 60 sec: 42188.8, 300 sec: 42070.8). Total num frames: 1983324160. Throughput: 0: 10577.3. Samples: 245807538. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:28,978][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:30,318][626795] Updated weights for policy 0, policy_version 242112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:32,270][626795] Updated weights for policy 0, policy_version 242122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:33,975][24592] Fps is (10 sec: 41778.9, 60 sec: 42188.8, 300 sec: 42043.0). Total num frames: 1983528960. Throughput: 0: 10569.4. Samples: 245870706. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:33,980][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:34,205][626795] Updated weights for policy 0, policy_version 242132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:36,186][626795] Updated weights for policy 0, policy_version 242142 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:38,103][626795] Updated weights for policy 0, policy_version 242152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:38,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42325.8, 300 sec: 42070.8). Total num frames: 1983741952. Throughput: 0: 10557.2. Samples: 245933868. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:38,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:40,051][626795] Updated weights for policy 0, policy_version 242162 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:42,025][626795] Updated weights for policy 0, policy_version 242172 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:43,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42189.0, 300 sec: 42043.0). Total num frames: 1983946752. Throughput: 0: 10569.0. Samples: 245965212. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:43,976][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:44,099][626795] Updated weights for policy 0, policy_version 242182 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:45,806][626795] Updated weights for policy 0, policy_version 242192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:47,878][626795] Updated weights for policy 0, policy_version 242202 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:48,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42336.4, 300 sec: 42098.6). Total num frames: 1984167936. Throughput: 0: 10547.8. Samples: 246028050. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:48,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:49,814][626795] Updated weights for policy 0, policy_version 242212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:51,825][626795] Updated weights for policy 0, policy_version 242222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:53,709][626795] Updated weights for policy 0, policy_version 242232 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:53,976][24592] Fps is (10 sec: 42596.8, 60 sec: 42325.0, 300 sec: 42070.7). Total num frames: 1984372736. Throughput: 0: 10553.1. Samples: 246091302. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:53,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:55,686][626795] Updated weights for policy 0, policy_version 242242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:57,610][626795] Updated weights for policy 0, policy_version 242252 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:58,657][626772] Signal inference workers to stop experience collection... (3300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:58,662][626772] Signal inference workers to resume experience collection... (3300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:58,667][626795] InferenceWorker_p0-w0: stopping experience collection (3300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:58,674][626795] InferenceWorker_p0-w0: resuming experience collection (3300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:58,976][24592] Fps is (10 sec: 40959.3, 60 sec: 42052.2, 300 sec: 42043.0). Total num frames: 1984577536. Throughput: 0: 10537.6. Samples: 246122646. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:42:58,978][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:42:59,574][626795] Updated weights for policy 0, policy_version 242262 (0.0035)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:01,534][626795] Updated weights for policy 0, policy_version 242272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:03,428][626795] Updated weights for policy 0, policy_version 242282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:03,976][24592] Fps is (10 sec: 41779.1, 60 sec: 42188.4, 300 sec: 42042.9). Total num frames: 1984790528. Throughput: 0: 10538.6. Samples: 246186306. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:03,977][24592] Avg episode reward: [(0, '4.931')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:04,023][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000242285_1984798720.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:04,141][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000241052_1974697984.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:05,514][626795] Updated weights for policy 0, policy_version 242292 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:07,417][626795] Updated weights for policy 0, policy_version 242302 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:08,976][24592] Fps is (10 sec: 42596.5, 60 sec: 42188.4, 300 sec: 42042.9). Total num frames: 1985003520. Throughput: 0: 10510.4. Samples: 246248718. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:08,978][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:09,345][626795] Updated weights for policy 0, policy_version 242312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:11,318][626795] Updated weights for policy 0, policy_version 242322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:13,323][626795] Updated weights for policy 0, policy_version 242332 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:13,975][24592] Fps is (10 sec: 41781.2, 60 sec: 42052.3, 300 sec: 42043.0). Total num frames: 1985208320. Throughput: 0: 10490.1. Samples: 246279594. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:13,976][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:15,200][626795] Updated weights for policy 0, policy_version 242342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:17,129][626795] Updated weights for policy 0, policy_version 242352 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:18,976][24592] Fps is (10 sec: 41781.8, 60 sec: 42189.0, 300 sec: 42043.0). Total num frames: 1985421312. Throughput: 0: 10499.7. Samples: 246343194. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:18,978][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:19,125][626795] Updated weights for policy 0, policy_version 242362 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:21,045][626795] Updated weights for policy 0, policy_version 242372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:22,956][626795] Updated weights for policy 0, policy_version 242382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:23,976][24592] Fps is (10 sec: 42597.5, 60 sec: 42052.1, 300 sec: 42070.8). Total num frames: 1985634304. Throughput: 0: 10511.2. Samples: 246406872. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:23,977][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:24,963][626795] Updated weights for policy 0, policy_version 242392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:26,826][626795] Updated weights for policy 0, policy_version 242402 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:28,783][626795] Updated weights for policy 0, policy_version 242412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:28,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42052.3, 300 sec: 42070.8). Total num frames: 1985847296. Throughput: 0: 10505.8. Samples: 246437970. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:28,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:30,651][626795] Updated weights for policy 0, policy_version 242422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:32,546][626795] Updated weights for policy 0, policy_version 242432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:33,976][24592] Fps is (10 sec: 42594.7, 60 sec: 42188.1, 300 sec: 42098.4). Total num frames: 1986060288. Throughput: 0: 10541.7. Samples: 246502440. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:33,977][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:34,498][626795] Updated weights for policy 0, policy_version 242442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:36,440][626795] Updated weights for policy 0, policy_version 242452 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:38,387][626795] Updated weights for policy 0, policy_version 242462 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:38,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.3, 300 sec: 42043.0). Total num frames: 1986265088. Throughput: 0: 10541.8. Samples: 246565680. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:38,976][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:40,342][626795] Updated weights for policy 0, policy_version 242472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:42,308][626795] Updated weights for policy 0, policy_version 242482 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:43,975][24592] Fps is (10 sec: 41783.4, 60 sec: 42188.8, 300 sec: 42070.8). Total num frames: 1986478080. Throughput: 0: 10530.5. Samples: 246596520. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:43,978][24592] Avg episode reward: [(0, '4.833')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:44,269][626795] Updated weights for policy 0, policy_version 242492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:46,273][626795] Updated weights for policy 0, policy_version 242502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:48,128][626795] Updated weights for policy 0, policy_version 242512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:48,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42052.2, 300 sec: 42070.8). Total num frames: 1986691072. Throughput: 0: 10523.6. Samples: 246659862. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:48,976][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:50,129][626795] Updated weights for policy 0, policy_version 242522 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:51,955][626795] Updated weights for policy 0, policy_version 242532 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:53,891][626795] Updated weights for policy 0, policy_version 242542 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:53,975][24592] Fps is (10 sec: 42598.9, 60 sec: 42189.2, 300 sec: 42098.6). Total num frames: 1986904064. Throughput: 0: 10566.4. Samples: 246724200. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:53,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:55,960][626795] Updated weights for policy 0, policy_version 242552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:57,811][626795] Updated weights for policy 0, policy_version 242562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:58,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42325.4, 300 sec: 42126.4). Total num frames: 1987117056. Throughput: 0: 10576.7. Samples: 246755544. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:43:58,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:43:59,765][626795] Updated weights for policy 0, policy_version 242572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:01,699][626795] Updated weights for policy 0, policy_version 242582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:03,477][626795] Updated weights for policy 0, policy_version 242592 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42189.2, 300 sec: 42098.6). Total num frames: 1987321856. Throughput: 0: 10580.7. Samples: 246819324. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:03,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:05,449][626795] Updated weights for policy 0, policy_version 242602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:07,447][626795] Updated weights for policy 0, policy_version 242612 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:08,975][24592] Fps is (10 sec: 41778.7, 60 sec: 42189.1, 300 sec: 42098.5). Total num frames: 1987534848. Throughput: 0: 10577.4. Samples: 246882852. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:08,977][24592] Avg episode reward: [(0, '4.894')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:09,419][626795] Updated weights for policy 0, policy_version 242622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:11,426][626795] Updated weights for policy 0, policy_version 242632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:13,389][626795] Updated weights for policy 0, policy_version 242642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:13,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42325.3, 300 sec: 42126.3). Total num frames: 1987747840. Throughput: 0: 10581.6. Samples: 246914142. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:13,977][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:15,353][626795] Updated weights for policy 0, policy_version 242652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:17,239][626795] Updated weights for policy 0, policy_version 242662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:18,976][24592] Fps is (10 sec: 42597.2, 60 sec: 42325.0, 300 sec: 42128.3). Total num frames: 1987960832. Throughput: 0: 10553.9. Samples: 246977358. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:18,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:18,984][626795] Updated weights for policy 0, policy_version 242672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:21,049][626795] Updated weights for policy 0, policy_version 242682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:23,050][626795] Updated weights for policy 0, policy_version 242692 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:23,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42325.5, 300 sec: 42154.1). Total num frames: 1988173824. Throughput: 0: 10552.0. Samples: 247040520. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:23,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:25,047][626795] Updated weights for policy 0, policy_version 242702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:26,824][626795] Updated weights for policy 0, policy_version 242712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:28,883][626795] Updated weights for policy 0, policy_version 242722 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:28,975][24592] Fps is (10 sec: 42599.9, 60 sec: 42325.3, 300 sec: 42154.2). Total num frames: 1988386816. Throughput: 0: 10571.7. Samples: 247072248. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:28,977][24592] Avg episode reward: [(0, '4.886')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:30,739][626795] Updated weights for policy 0, policy_version 242732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:32,637][626795] Updated weights for policy 0, policy_version 242742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:33,976][24592] Fps is (10 sec: 41777.3, 60 sec: 42189.3, 300 sec: 42126.3). Total num frames: 1988591616. Throughput: 0: 10585.1. Samples: 247136196. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:33,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:34,639][626795] Updated weights for policy 0, policy_version 242752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:36,579][626795] Updated weights for policy 0, policy_version 242762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:38,450][626795] Updated weights for policy 0, policy_version 242772 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:38,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42325.3, 300 sec: 42154.1). Total num frames: 1988804608. Throughput: 0: 10572.8. Samples: 247199976. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:38,978][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:40,368][626795] Updated weights for policy 0, policy_version 242782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:42,296][626795] Updated weights for policy 0, policy_version 242792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:43,975][24592] Fps is (10 sec: 42599.9, 60 sec: 42325.3, 300 sec: 42154.1). Total num frames: 1989017600. Throughput: 0: 10569.8. Samples: 247231188. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:43,976][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:44,296][626795] Updated weights for policy 0, policy_version 242802 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:46,330][626795] Updated weights for policy 0, policy_version 242812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:48,054][626795] Updated weights for policy 0, policy_version 242822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42188.8, 300 sec: 42126.3). Total num frames: 1989222400. Throughput: 0: 10558.6. Samples: 247294464. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:48,979][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:50,169][626795] Updated weights for policy 0, policy_version 242832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:52,137][626795] Updated weights for policy 0, policy_version 242842 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:53,975][24592] Fps is (10 sec: 41779.7, 60 sec: 42188.8, 300 sec: 42155.1). Total num frames: 1989435392. Throughput: 0: 10552.0. Samples: 247357692. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:53,976][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:54,034][626795] Updated weights for policy 0, policy_version 242852 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:55,998][626795] Updated weights for policy 0, policy_version 242862 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:58,067][626795] Updated weights for policy 0, policy_version 242872 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:58,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42188.8, 300 sec: 42154.1). Total num frames: 1989648384. Throughput: 0: 10545.3. Samples: 247388682. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:44:58,976][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:44:59,780][626795] Updated weights for policy 0, policy_version 242882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:01,780][626795] Updated weights for policy 0, policy_version 242892 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:03,769][626795] Updated weights for policy 0, policy_version 242902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:03,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42325.3, 300 sec: 42154.1). Total num frames: 1989861376. Throughput: 0: 10547.5. Samples: 247451994. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:03,976][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000242903_1989861376.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:04,058][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000241668_1979744256.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:05,624][626795] Updated weights for policy 0, policy_version 242912 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:07,655][626795] Updated weights for policy 0, policy_version 242922 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:08,976][24592] Fps is (10 sec: 41778.6, 60 sec: 42188.7, 300 sec: 42154.1). Total num frames: 1990066176. Throughput: 0: 10553.4. Samples: 247515426. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:08,977][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:09,627][626795] Updated weights for policy 0, policy_version 242932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:11,513][626795] Updated weights for policy 0, policy_version 242942 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:13,509][626795] Updated weights for policy 0, policy_version 242952 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:13,976][24592] Fps is (10 sec: 41778.8, 60 sec: 42188.7, 300 sec: 42154.1). Total num frames: 1990279168. Throughput: 0: 10551.8. Samples: 247547082. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:13,978][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:15,529][626795] Updated weights for policy 0, policy_version 242962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:17,407][626795] Updated weights for policy 0, policy_version 242972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:18,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42189.1, 300 sec: 42154.1). Total num frames: 1990492160. Throughput: 0: 10523.2. Samples: 247609734. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:18,977][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:19,546][626795] Updated weights for policy 0, policy_version 242982 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:21,347][626795] Updated weights for policy 0, policy_version 242992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:23,255][626795] Updated weights for policy 0, policy_version 243002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:23,975][24592] Fps is (10 sec: 41779.8, 60 sec: 42052.2, 300 sec: 42126.3). Total num frames: 1990696960. Throughput: 0: 10512.3. Samples: 247673028. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:23,976][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:25,221][626795] Updated weights for policy 0, policy_version 243012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:27,206][626795] Updated weights for policy 0, policy_version 243022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:28,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.3, 300 sec: 42126.3). Total num frames: 1990909952. Throughput: 0: 10510.3. Samples: 247704150. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:28,977][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:29,202][626795] Updated weights for policy 0, policy_version 243032 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:31,148][626795] Updated weights for policy 0, policy_version 243042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:32,991][626795] Updated weights for policy 0, policy_version 243052 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:33,976][24592] Fps is (10 sec: 41778.6, 60 sec: 42052.5, 300 sec: 42126.3). Total num frames: 1991114752. Throughput: 0: 10502.9. Samples: 247767096. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:33,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:34,999][626795] Updated weights for policy 0, policy_version 243062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:37,061][626795] Updated weights for policy 0, policy_version 243072 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:38,890][626795] Updated weights for policy 0, policy_version 243082 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:38,975][24592] Fps is (10 sec: 41778.8, 60 sec: 42052.2, 300 sec: 42126.3). Total num frames: 1991327744. Throughput: 0: 10499.2. Samples: 247830156. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:38,977][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:40,900][626795] Updated weights for policy 0, policy_version 243092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:42,781][626795] Updated weights for policy 0, policy_version 243102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:43,977][24592] Fps is (10 sec: 42590.5, 60 sec: 42050.9, 300 sec: 42126.0). Total num frames: 1991540736. Throughput: 0: 10511.0. Samples: 247861698. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:43,982][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:44,708][626795] Updated weights for policy 0, policy_version 243112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:46,677][626795] Updated weights for policy 0, policy_version 243122 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:48,715][626795] Updated weights for policy 0, policy_version 243132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:48,976][24592] Fps is (10 sec: 41777.3, 60 sec: 42051.9, 300 sec: 42098.5). Total num frames: 1991745536. Throughput: 0: 10504.1. Samples: 247924686. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:48,981][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:50,746][626795] Updated weights for policy 0, policy_version 243142 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:52,744][626795] Updated weights for policy 0, policy_version 243152 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:53,975][24592] Fps is (10 sec: 41787.5, 60 sec: 42052.2, 300 sec: 42126.3). Total num frames: 1991958528. Throughput: 0: 10467.9. Samples: 247986480. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:53,977][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:54,592][626795] Updated weights for policy 0, policy_version 243162 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:56,653][626795] Updated weights for policy 0, policy_version 243172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:45:58,462][626795] Updated weights for policy 0, policy_version 243182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:58,975][24592] Fps is (10 sec: 41781.4, 60 sec: 41915.7, 300 sec: 42126.3). Total num frames: 1992163328. Throughput: 0: 10460.2. Samples: 248017788. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:45:58,976][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:00,487][626795] Updated weights for policy 0, policy_version 243192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:02,438][626795] Updated weights for policy 0, policy_version 243202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41915.8, 300 sec: 42126.3). Total num frames: 1992376320. Throughput: 0: 10468.9. Samples: 248080836. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:03,976][24592] Avg episode reward: [(0, '5.005')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:04,418][626795] Updated weights for policy 0, policy_version 243212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:06,249][626795] Updated weights for policy 0, policy_version 243222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:08,239][626795] Updated weights for policy 0, policy_version 243232 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:08,976][24592] Fps is (10 sec: 41774.8, 60 sec: 41915.1, 300 sec: 42098.4). Total num frames: 1992581120. Throughput: 0: 10464.8. Samples: 248143956. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:08,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:10,198][626795] Updated weights for policy 0, policy_version 243242 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:12,225][626795] Updated weights for policy 0, policy_version 243252 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41915.9, 300 sec: 42126.3). Total num frames: 1992794112. Throughput: 0: 10458.3. Samples: 248174772. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:13,976][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:14,180][626795] Updated weights for policy 0, policy_version 243262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:16,103][626795] Updated weights for policy 0, policy_version 243272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:17,969][626795] Updated weights for policy 0, policy_version 243282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:18,975][24592] Fps is (10 sec: 41783.4, 60 sec: 41779.2, 300 sec: 42098.5). Total num frames: 1992998912. Throughput: 0: 10473.2. Samples: 248238390. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:18,977][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:20,077][626795] Updated weights for policy 0, policy_version 243292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:21,945][626795] Updated weights for policy 0, policy_version 243302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:23,859][626795] Updated weights for policy 0, policy_version 243312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:23,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41915.7, 300 sec: 42098.5). Total num frames: 1993211904. Throughput: 0: 10468.7. Samples: 248301246. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:23,978][24592] Avg episode reward: [(0, '4.877')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:25,875][626795] Updated weights for policy 0, policy_version 243322 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:27,823][626795] Updated weights for policy 0, policy_version 243332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:28,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.2, 300 sec: 42098.6). Total num frames: 1993416704. Throughput: 0: 10452.2. Samples: 248332026. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:28,977][24592] Avg episode reward: [(0, '5.027')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:29,781][626795] Updated weights for policy 0, policy_version 243342 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:31,714][626795] Updated weights for policy 0, policy_version 243352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:33,662][626795] Updated weights for policy 0, policy_version 243362 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:33,977][24592] Fps is (10 sec: 41774.2, 60 sec: 41915.0, 300 sec: 42126.2). Total num frames: 1993629696. Throughput: 0: 10455.7. Samples: 248395200. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:33,979][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:35,659][626795] Updated weights for policy 0, policy_version 243372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:37,597][626795] Updated weights for policy 0, policy_version 243382 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:38,976][24592] Fps is (10 sec: 42597.1, 60 sec: 41915.6, 300 sec: 42126.3). Total num frames: 1993842688. Throughput: 0: 10496.3. Samples: 248458818. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:38,979][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:39,547][626795] Updated weights for policy 0, policy_version 243392 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:41,580][626795] Updated weights for policy 0, policy_version 243402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:43,343][626795] Updated weights for policy 0, policy_version 243412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:43,975][24592] Fps is (10 sec: 42603.9, 60 sec: 41917.2, 300 sec: 42128.6). Total num frames: 1994055680. Throughput: 0: 10492.4. Samples: 248489946. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:43,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:45,351][626795] Updated weights for policy 0, policy_version 243422 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:47,375][626795] Updated weights for policy 0, policy_version 243432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:48,976][24592] Fps is (10 sec: 41779.0, 60 sec: 41915.9, 300 sec: 42126.3). Total num frames: 1994260480. Throughput: 0: 10489.7. Samples: 248552874. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:48,976][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:49,253][626795] Updated weights for policy 0, policy_version 243442 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:51,210][626795] Updated weights for policy 0, policy_version 243452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:53,218][626795] Updated weights for policy 0, policy_version 243462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:53,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41779.2, 300 sec: 42070.8). Total num frames: 1994465280. Throughput: 0: 10470.9. Samples: 248615136. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:53,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:55,210][626795] Updated weights for policy 0, policy_version 243472 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:57,151][626795] Updated weights for policy 0, policy_version 243482 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:58,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41915.5, 300 sec: 42098.5). Total num frames: 1994678272. Throughput: 0: 10463.4. Samples: 248645628. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:46:58,977][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:46:59,224][626795] Updated weights for policy 0, policy_version 243492 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:01,289][626795] Updated weights for policy 0, policy_version 243502 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:03,269][626795] Updated weights for policy 0, policy_version 243512 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:03,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41642.7, 300 sec: 42043.0). Total num frames: 1994874880. Throughput: 0: 10423.1. Samples: 248707428. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:03,978][24592] Avg episode reward: [(0, '4.821')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000243515_1994874880.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:04,142][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000242285_1984798720.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:05,213][626795] Updated weights for policy 0, policy_version 243522 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:07,221][626795] Updated weights for policy 0, policy_version 243532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:08,976][24592] Fps is (10 sec: 40959.2, 60 sec: 41779.5, 300 sec: 42042.9). Total num frames: 1995087872. Throughput: 0: 10410.5. Samples: 248769726. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:08,977][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:09,130][626795] Updated weights for policy 0, policy_version 243542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:11,219][626795] Updated weights for policy 0, policy_version 243552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:13,115][626795] Updated weights for policy 0, policy_version 243562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:13,976][24592] Fps is (10 sec: 42596.3, 60 sec: 41778.8, 300 sec: 42070.7). Total num frames: 1995300864. Throughput: 0: 10413.5. Samples: 248800638. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:13,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:15,026][626795] Updated weights for policy 0, policy_version 243572 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:16,726][626772] Signal inference workers to stop experience collection... (3350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:16,727][626772] Signal inference workers to resume experience collection... (3350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:16,736][626795] InferenceWorker_p0-w0: stopping experience collection (3350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:16,741][626795] InferenceWorker_p0-w0: resuming experience collection (3350 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:16,960][626795] Updated weights for policy 0, policy_version 243582 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:18,963][626795] Updated weights for policy 0, policy_version 243592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:18,975][24592] Fps is (10 sec: 41782.0, 60 sec: 41779.3, 300 sec: 42015.2). Total num frames: 1995505664. Throughput: 0: 10398.0. Samples: 248863098. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:18,976][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:20,959][626795] Updated weights for policy 0, policy_version 243602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:22,777][626795] Updated weights for policy 0, policy_version 243612 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:23,976][24592] Fps is (10 sec: 40961.6, 60 sec: 41642.6, 300 sec: 41987.5). Total num frames: 1995710464. Throughput: 0: 10390.7. Samples: 248926398. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:23,978][24592] Avg episode reward: [(0, '4.313')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:24,871][626795] Updated weights for policy 0, policy_version 243622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:26,800][626795] Updated weights for policy 0, policy_version 243632 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:28,745][626795] Updated weights for policy 0, policy_version 243642 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:28,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41779.2, 300 sec: 42015.2). Total num frames: 1995923456. Throughput: 0: 10392.9. Samples: 248957628. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:28,977][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:30,740][626795] Updated weights for policy 0, policy_version 243652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:32,675][626795] Updated weights for policy 0, policy_version 243662 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:33,976][24592] Fps is (10 sec: 41779.1, 60 sec: 41643.4, 300 sec: 41987.5). Total num frames: 1996128256. Throughput: 0: 10379.1. Samples: 249019932. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:33,976][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:34,665][626795] Updated weights for policy 0, policy_version 243672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:36,620][626795] Updated weights for policy 0, policy_version 243682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:38,522][626795] Updated weights for policy 0, policy_version 243692 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:38,977][24592] Fps is (10 sec: 40954.3, 60 sec: 41505.4, 300 sec: 41987.3). Total num frames: 1996333056. Throughput: 0: 10391.1. Samples: 249082752. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:38,978][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:40,648][626795] Updated weights for policy 0, policy_version 243702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:42,524][626795] Updated weights for policy 0, policy_version 243712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:43,976][24592] Fps is (10 sec: 41777.3, 60 sec: 41505.7, 300 sec: 41959.6). Total num frames: 1996546048. Throughput: 0: 10397.0. Samples: 249113496. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:43,977][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:44,435][626795] Updated weights for policy 0, policy_version 243722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:46,469][626795] Updated weights for policy 0, policy_version 243732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:48,450][626795] Updated weights for policy 0, policy_version 243742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:48,981][24592] Fps is (10 sec: 41761.3, 60 sec: 41502.4, 300 sec: 41959.0). Total num frames: 1996750848. Throughput: 0: 10420.9. Samples: 249176430. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:48,982][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:50,361][626795] Updated weights for policy 0, policy_version 243752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:52,292][626795] Updated weights for policy 0, policy_version 243762 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:53,975][24592] Fps is (10 sec: 41781.7, 60 sec: 41642.7, 300 sec: 41987.5). Total num frames: 1996963840. Throughput: 0: 10437.5. Samples: 249239406. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:53,976][24592] Avg episode reward: [(0, '4.905')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:54,364][626795] Updated weights for policy 0, policy_version 243772 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:56,298][626795] Updated weights for policy 0, policy_version 243782 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:47:58,237][626795] Updated weights for policy 0, policy_version 243792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:58,976][24592] Fps is (10 sec: 41802.3, 60 sec: 41506.3, 300 sec: 41959.7). Total num frames: 1997168640. Throughput: 0: 10429.5. Samples: 249269964. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:47:58,976][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:00,245][626795] Updated weights for policy 0, policy_version 243802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:02,165][626795] Updated weights for policy 0, policy_version 243812 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:03,976][24592] Fps is (10 sec: 41775.1, 60 sec: 41778.5, 300 sec: 41959.6). Total num frames: 1997381632. Throughput: 0: 10422.0. Samples: 249332100. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:03,978][24592] Avg episode reward: [(0, '4.342')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:04,201][626795] Updated weights for policy 0, policy_version 243822 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:06,138][626795] Updated weights for policy 0, policy_version 243832 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:08,119][626795] Updated weights for policy 0, policy_version 243842 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:08,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41643.1, 300 sec: 41959.7). Total num frames: 1997586432. Throughput: 0: 10408.6. Samples: 249394782. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:08,976][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:10,092][626795] Updated weights for policy 0, policy_version 243852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:12,029][626795] Updated weights for policy 0, policy_version 243862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:13,856][626795] Updated weights for policy 0, policy_version 243872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:13,975][24592] Fps is (10 sec: 41783.1, 60 sec: 41643.0, 300 sec: 41959.7). Total num frames: 1997799424. Throughput: 0: 10417.7. Samples: 249426426. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:13,977][24592] Avg episode reward: [(0, '4.856')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:15,937][626795] Updated weights for policy 0, policy_version 243882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:17,718][626795] Updated weights for policy 0, policy_version 243892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:18,976][24592] Fps is (10 sec: 42597.2, 60 sec: 41779.0, 300 sec: 41959.7). Total num frames: 1998012416. Throughput: 0: 10440.1. Samples: 249489738. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:18,979][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:19,761][626795] Updated weights for policy 0, policy_version 243902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:21,652][626795] Updated weights for policy 0, policy_version 243912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:23,584][626795] Updated weights for policy 0, policy_version 243922 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:23,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41915.8, 300 sec: 41959.7). Total num frames: 1998225408. Throughput: 0: 10462.8. Samples: 249553566. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:23,977][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:25,657][626795] Updated weights for policy 0, policy_version 243932 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:27,612][626795] Updated weights for policy 0, policy_version 243942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:28,975][24592] Fps is (10 sec: 40961.0, 60 sec: 41642.7, 300 sec: 41904.3). Total num frames: 1998422016. Throughput: 0: 10456.4. Samples: 249584028. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:28,979][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:29,489][626795] Updated weights for policy 0, policy_version 243952 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:31,487][626795] Updated weights for policy 0, policy_version 243962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:33,459][626795] Updated weights for policy 0, policy_version 243972 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:33,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41779.3, 300 sec: 41931.9). Total num frames: 1998635008. Throughput: 0: 10455.7. Samples: 249646878. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:33,976][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:35,454][626795] Updated weights for policy 0, policy_version 243982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:37,371][626795] Updated weights for policy 0, policy_version 243992 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:38,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41916.8, 300 sec: 41932.0). Total num frames: 1998848000. Throughput: 0: 10454.8. Samples: 249709872. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:38,977][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:39,297][626795] Updated weights for policy 0, policy_version 244002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:41,268][626795] Updated weights for policy 0, policy_version 244012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:43,245][626795] Updated weights for policy 0, policy_version 244022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:43,976][24592] Fps is (10 sec: 41778.7, 60 sec: 41779.5, 300 sec: 41904.1). Total num frames: 1999052800. Throughput: 0: 10479.3. Samples: 249741534. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:43,976][24592] Avg episode reward: [(0, '5.002')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:45,162][626795] Updated weights for policy 0, policy_version 244032 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:47,146][626795] Updated weights for policy 0, policy_version 244042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:48,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41919.7, 300 sec: 41904.2). Total num frames: 1999265792. Throughput: 0: 10493.7. Samples: 249804306. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:48,977][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:49,178][626795] Updated weights for policy 0, policy_version 244052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:51,074][626795] Updated weights for policy 0, policy_version 244062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:52,995][626795] Updated weights for policy 0, policy_version 244072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:53,975][24592] Fps is (10 sec: 42599.1, 60 sec: 41915.8, 300 sec: 41904.2). Total num frames: 1999478784. Throughput: 0: 10498.8. Samples: 249867228. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:53,977][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:54,987][626795] Updated weights for policy 0, policy_version 244082 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:56,936][626795] Updated weights for policy 0, policy_version 244092 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:48:58,811][626795] Updated weights for policy 0, policy_version 244102 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.9, 300 sec: 41904.2). Total num frames: 1999683584. Throughput: 0: 10478.4. Samples: 249897954. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:48:58,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:00,868][626795] Updated weights for policy 0, policy_version 244112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:02,815][626795] Updated weights for policy 0, policy_version 244122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41916.5, 300 sec: 41904.2). Total num frames: 1999896576. Throughput: 0: 10471.8. Samples: 249960966. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:03,978][24592] Avg episode reward: [(0, '4.934')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000244128_1999896576.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:04,135][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000242903_1989861376.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:04,848][626795] Updated weights for policy 0, policy_version 244132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:06,747][626795] Updated weights for policy 0, policy_version 244142 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:08,684][626795] Updated weights for policy 0, policy_version 244152 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:08,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41915.7, 300 sec: 41876.4). Total num frames: 2000101376. Throughput: 0: 10433.1. Samples: 250023054. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:08,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:10,695][626795] Updated weights for policy 0, policy_version 244162 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:12,676][626795] Updated weights for policy 0, policy_version 244172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:13,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41779.2, 300 sec: 41848.7). Total num frames: 2000306176. Throughput: 0: 10438.9. Samples: 250053780. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:13,979][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:14,676][626795] Updated weights for policy 0, policy_version 244182 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:16,560][626795] Updated weights for policy 0, policy_version 244192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:18,490][626795] Updated weights for policy 0, policy_version 244202 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:18,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41642.9, 300 sec: 41820.9). Total num frames: 2000510976. Throughput: 0: 10442.9. Samples: 250116810. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:18,977][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:20,617][626795] Updated weights for policy 0, policy_version 244212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:22,469][626795] Updated weights for policy 0, policy_version 244222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:23,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41779.3, 300 sec: 41848.6). Total num frames: 2000732160. Throughput: 0: 10444.8. Samples: 250179888. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:23,978][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:24,384][626795] Updated weights for policy 0, policy_version 244232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:26,357][626795] Updated weights for policy 0, policy_version 244242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:28,317][626795] Updated weights for policy 0, policy_version 244252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:28,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41915.8, 300 sec: 41848.7). Total num frames: 2000936960. Throughput: 0: 10440.4. Samples: 250211352. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:28,977][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:30,205][626795] Updated weights for policy 0, policy_version 244262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:32,156][626795] Updated weights for policy 0, policy_version 244272 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:33,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41779.2, 300 sec: 41820.9). Total num frames: 2001141760. Throughput: 0: 10438.8. Samples: 250274052. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:33,977][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:34,149][626795] Updated weights for policy 0, policy_version 244282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:36,149][626795] Updated weights for policy 0, policy_version 244292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:38,134][626795] Updated weights for policy 0, policy_version 244302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:38,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41779.1, 300 sec: 41820.9). Total num frames: 2001354752. Throughput: 0: 10442.8. Samples: 250337154. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:38,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:40,047][626795] Updated weights for policy 0, policy_version 244312 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:42,056][626795] Updated weights for policy 0, policy_version 244322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:43,952][626795] Updated weights for policy 0, policy_version 244332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:43,976][24592] Fps is (10 sec: 42596.5, 60 sec: 41915.5, 300 sec: 41848.6). Total num frames: 2001567744. Throughput: 0: 10449.1. Samples: 250368168. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:43,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:45,895][626795] Updated weights for policy 0, policy_version 244342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:47,903][626795] Updated weights for policy 0, policy_version 244352 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:48,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41779.1, 300 sec: 41820.8). Total num frames: 2001772544. Throughput: 0: 10450.2. Samples: 250431228. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:48,976][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:49,944][626795] Updated weights for policy 0, policy_version 244362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:51,795][626795] Updated weights for policy 0, policy_version 244372 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:53,724][626795] Updated weights for policy 0, policy_version 244382 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:53,977][24592] Fps is (10 sec: 41774.6, 60 sec: 41778.1, 300 sec: 41820.6). Total num frames: 2001985536. Throughput: 0: 10473.1. Samples: 250494360. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:53,978][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:55,671][626795] Updated weights for policy 0, policy_version 244392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:57,657][626795] Updated weights for policy 0, policy_version 244402 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.2, 300 sec: 41793.1). Total num frames: 2002190336. Throughput: 0: 10476.8. Samples: 250525236. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:49:58,977][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:49:59,519][626795] Updated weights for policy 0, policy_version 244412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:01,535][626795] Updated weights for policy 0, policy_version 244422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:03,472][626795] Updated weights for policy 0, policy_version 244432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:03,975][24592] Fps is (10 sec: 42605.0, 60 sec: 41915.7, 300 sec: 41848.6). Total num frames: 2002411520. Throughput: 0: 10499.7. Samples: 250589298. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:03,977][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:05,421][626795] Updated weights for policy 0, policy_version 244442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:07,407][626795] Updated weights for policy 0, policy_version 244452 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41915.8, 300 sec: 41820.9). Total num frames: 2002616320. Throughput: 0: 10483.7. Samples: 250651656. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:08,977][24592] Avg episode reward: [(0, '4.920')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:09,452][626795] Updated weights for policy 0, policy_version 244462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:11,336][626795] Updated weights for policy 0, policy_version 244472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:13,208][626795] Updated weights for policy 0, policy_version 244482 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:13,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.3, 300 sec: 41820.9). Total num frames: 2002829312. Throughput: 0: 10480.7. Samples: 250682982. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:13,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:15,239][626795] Updated weights for policy 0, policy_version 244492 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:17,135][626795] Updated weights for policy 0, policy_version 244502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:18,976][24592] Fps is (10 sec: 41777.5, 60 sec: 42052.0, 300 sec: 41820.8). Total num frames: 2003034112. Throughput: 0: 10490.3. Samples: 250746120. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:18,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:19,048][626795] Updated weights for policy 0, policy_version 244512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:20,965][626795] Updated weights for policy 0, policy_version 244522 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:22,981][626795] Updated weights for policy 0, policy_version 244532 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:23,976][24592] Fps is (10 sec: 41777.9, 60 sec: 41915.5, 300 sec: 41820.8). Total num frames: 2003247104. Throughput: 0: 10498.6. Samples: 250809594. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:23,979][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:24,996][626795] Updated weights for policy 0, policy_version 244542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:26,840][626795] Updated weights for policy 0, policy_version 244552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:28,817][626795] Updated weights for policy 0, policy_version 244562 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:28,976][24592] Fps is (10 sec: 42599.0, 60 sec: 42052.1, 300 sec: 41848.6). Total num frames: 2003460096. Throughput: 0: 10509.8. Samples: 250841106. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:28,976][24592] Avg episode reward: [(0, '5.028')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:30,682][626795] Updated weights for policy 0, policy_version 244572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:32,648][626795] Updated weights for policy 0, policy_version 244582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:33,975][24592] Fps is (10 sec: 42599.7, 60 sec: 42188.8, 300 sec: 41848.6). Total num frames: 2003673088. Throughput: 0: 10526.5. Samples: 250904922. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:33,976][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:34,630][626795] Updated weights for policy 0, policy_version 244592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:36,575][626795] Updated weights for policy 0, policy_version 244602 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:38,585][626795] Updated weights for policy 0, policy_version 244612 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:38,976][24592] Fps is (10 sec: 41779.7, 60 sec: 42052.2, 300 sec: 41821.1). Total num frames: 2003877888. Throughput: 0: 10514.9. Samples: 250967514. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:38,976][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:40,400][626795] Updated weights for policy 0, policy_version 244622 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:42,468][626795] Updated weights for policy 0, policy_version 244632 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:43,976][24592] Fps is (10 sec: 40957.9, 60 sec: 41915.7, 300 sec: 41820.9). Total num frames: 2004082688. Throughput: 0: 10506.0. Samples: 250998012. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:43,978][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:44,466][626795] Updated weights for policy 0, policy_version 244642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:46,378][626795] Updated weights for policy 0, policy_version 244652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:48,331][626795] Updated weights for policy 0, policy_version 244662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:48,975][24592] Fps is (10 sec: 41779.9, 60 sec: 42052.3, 300 sec: 41820.9). Total num frames: 2004295680. Throughput: 0: 10491.1. Samples: 251061396. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:48,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:50,398][626795] Updated weights for policy 0, policy_version 244672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:52,254][626795] Updated weights for policy 0, policy_version 244682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:53,975][24592] Fps is (10 sec: 41781.5, 60 sec: 41916.8, 300 sec: 41820.9). Total num frames: 2004500480. Throughput: 0: 10496.9. Samples: 251124018. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:53,976][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:54,159][626795] Updated weights for policy 0, policy_version 244692 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:56,209][626795] Updated weights for policy 0, policy_version 244702 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:50:58,178][626795] Updated weights for policy 0, policy_version 244712 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:58,976][24592] Fps is (10 sec: 41776.0, 60 sec: 42051.7, 300 sec: 41820.7). Total num frames: 2004713472. Throughput: 0: 10492.6. Samples: 251155158. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:50:58,979][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:00,155][626795] Updated weights for policy 0, policy_version 244722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:02,058][626795] Updated weights for policy 0, policy_version 244732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:03,942][626795] Updated weights for policy 0, policy_version 244742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:03,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41915.6, 300 sec: 41848.8). Total num frames: 2004926464. Throughput: 0: 10489.4. Samples: 251218140. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:03,978][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000244742_2004926464.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:04,064][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000243515_1994874880.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:05,915][626795] Updated weights for policy 0, policy_version 244752 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:07,893][626795] Updated weights for policy 0, policy_version 244762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:08,976][24592] Fps is (10 sec: 41780.7, 60 sec: 41915.5, 300 sec: 41820.8). Total num frames: 2005131264. Throughput: 0: 10480.1. Samples: 251281200. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:08,976][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:09,878][626795] Updated weights for policy 0, policy_version 244772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:11,814][626795] Updated weights for policy 0, policy_version 244782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:13,778][626795] Updated weights for policy 0, policy_version 244792 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:13,976][24592] Fps is (10 sec: 41775.9, 60 sec: 41915.1, 300 sec: 41848.5). Total num frames: 2005344256. Throughput: 0: 10468.8. Samples: 251312208. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:13,978][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:15,689][626795] Updated weights for policy 0, policy_version 244802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:17,627][626795] Updated weights for policy 0, policy_version 244812 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:18,977][24592] Fps is (10 sec: 41775.1, 60 sec: 41915.1, 300 sec: 41820.7). Total num frames: 2005549056. Throughput: 0: 10464.2. Samples: 251375826. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:18,978][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:19,626][626795] Updated weights for policy 0, policy_version 244822 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:21,591][626795] Updated weights for policy 0, policy_version 244832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:23,368][626795] Updated weights for policy 0, policy_version 244842 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:23,975][24592] Fps is (10 sec: 42602.5, 60 sec: 42052.5, 300 sec: 41876.4). Total num frames: 2005770240. Throughput: 0: 10485.4. Samples: 251439354. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:23,977][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:25,464][626795] Updated weights for policy 0, policy_version 244852 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:27,320][626795] Updated weights for policy 0, policy_version 244862 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:28,977][24592] Fps is (10 sec: 42599.2, 60 sec: 41915.1, 300 sec: 41848.6). Total num frames: 2005975040. Throughput: 0: 10498.9. Samples: 251470470. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:28,978][24592] Avg episode reward: [(0, '4.906')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:29,340][626795] Updated weights for policy 0, policy_version 244872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:31,314][626795] Updated weights for policy 0, policy_version 244882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:33,321][626795] Updated weights for policy 0, policy_version 244892 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:33,976][24592] Fps is (10 sec: 40958.1, 60 sec: 41778.9, 300 sec: 41820.8). Total num frames: 2006179840. Throughput: 0: 10485.8. Samples: 251533260. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:33,977][24592] Avg episode reward: [(0, '4.893')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:35,236][626795] Updated weights for policy 0, policy_version 244902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:37,217][626795] Updated weights for policy 0, policy_version 244912 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:38,975][24592] Fps is (10 sec: 41784.3, 60 sec: 41915.9, 300 sec: 41820.9). Total num frames: 2006392832. Throughput: 0: 10485.5. Samples: 251595864. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:38,977][24592] Avg episode reward: [(0, '4.490')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:39,174][626795] Updated weights for policy 0, policy_version 244922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:41,146][626795] Updated weights for policy 0, policy_version 244932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:43,047][626795] Updated weights for policy 0, policy_version 244942 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:43,975][24592] Fps is (10 sec: 41781.2, 60 sec: 41916.1, 300 sec: 41820.9). Total num frames: 2006597632. Throughput: 0: 10490.3. Samples: 251627214. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:43,977][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:44,994][626795] Updated weights for policy 0, policy_version 244952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:46,908][626795] Updated weights for policy 0, policy_version 244962 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:48,889][626795] Updated weights for policy 0, policy_version 244972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41915.7, 300 sec: 41848.6). Total num frames: 2006810624. Throughput: 0: 10489.1. Samples: 251690148. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:48,977][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:50,886][626795] Updated weights for policy 0, policy_version 244982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:52,765][626795] Updated weights for policy 0, policy_version 244992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:53,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42052.2, 300 sec: 41848.7). Total num frames: 2007023616. Throughput: 0: 10504.8. Samples: 251753910. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:53,977][24592] Avg episode reward: [(0, '4.919')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:54,796][626795] Updated weights for policy 0, policy_version 245002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:56,664][626795] Updated weights for policy 0, policy_version 245012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:51:58,615][626795] Updated weights for policy 0, policy_version 245022 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:58,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42052.8, 300 sec: 41904.2). Total num frames: 2007236608. Throughput: 0: 10506.5. Samples: 251784990. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:51:58,976][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:00,470][626795] Updated weights for policy 0, policy_version 245032 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:02,525][626795] Updated weights for policy 0, policy_version 245042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:03,976][24592] Fps is (10 sec: 41774.7, 60 sec: 41915.1, 300 sec: 41876.3). Total num frames: 2007441408. Throughput: 0: 10501.3. Samples: 251848380. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:03,978][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:04,510][626795] Updated weights for policy 0, policy_version 245052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:06,375][626795] Updated weights for policy 0, policy_version 245062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:08,305][626795] Updated weights for policy 0, policy_version 245072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:08,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42052.6, 300 sec: 41876.5). Total num frames: 2007654400. Throughput: 0: 10490.5. Samples: 251911428. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:08,977][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:10,331][626795] Updated weights for policy 0, policy_version 245082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:12,282][626795] Updated weights for policy 0, policy_version 245092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:13,975][24592] Fps is (10 sec: 42603.1, 60 sec: 42053.0, 300 sec: 41904.2). Total num frames: 2007867392. Throughput: 0: 10505.7. Samples: 251943216. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:13,977][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:14,214][626795] Updated weights for policy 0, policy_version 245102 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:16,149][626795] Updated weights for policy 0, policy_version 245112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:18,100][626795] Updated weights for policy 0, policy_version 245122 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:18,976][24592] Fps is (10 sec: 41778.4, 60 sec: 42053.1, 300 sec: 41904.2). Total num frames: 2008072192. Throughput: 0: 10522.2. Samples: 252006756. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:18,977][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:20,034][626795] Updated weights for policy 0, policy_version 245132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:21,855][626795] Updated weights for policy 0, policy_version 245142 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:23,851][626795] Updated weights for policy 0, policy_version 245152 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:23,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2008285184. Throughput: 0: 10523.6. Samples: 252069426. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:23,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:25,791][626795] Updated weights for policy 0, policy_version 245162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:27,807][626795] Updated weights for policy 0, policy_version 245172 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:28,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42053.0, 300 sec: 41931.9). Total num frames: 2008498176. Throughput: 0: 10532.2. Samples: 252101166. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:28,977][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:29,732][626795] Updated weights for policy 0, policy_version 245182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:31,700][626795] Updated weights for policy 0, policy_version 245192 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:33,533][626795] Updated weights for policy 0, policy_version 245202 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:33,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42052.6, 300 sec: 41932.1). Total num frames: 2008702976. Throughput: 0: 10549.3. Samples: 252164868. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:33,976][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:35,548][626795] Updated weights for policy 0, policy_version 245212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:37,488][626795] Updated weights for policy 0, policy_version 245222 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:38,987][24592] Fps is (10 sec: 41731.3, 60 sec: 42044.1, 300 sec: 41930.4). Total num frames: 2008915968. Throughput: 0: 10532.8. Samples: 252228006. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:38,988][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:39,446][626795] Updated weights for policy 0, policy_version 245232 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:41,365][626795] Updated weights for policy 0, policy_version 245242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:43,316][626795] Updated weights for policy 0, policy_version 245252 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:43,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42188.8, 300 sec: 41960.5). Total num frames: 2009128960. Throughput: 0: 10543.5. Samples: 252259446. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:43,976][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:45,231][626795] Updated weights for policy 0, policy_version 245262 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:47,167][626795] Updated weights for policy 0, policy_version 245272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:48,976][24592] Fps is (10 sec: 42647.3, 60 sec: 42188.7, 300 sec: 41959.7). Total num frames: 2009341952. Throughput: 0: 10533.3. Samples: 252322368. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:48,976][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:49,217][626795] Updated weights for policy 0, policy_version 245282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:51,191][626795] Updated weights for policy 0, policy_version 245292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:53,093][626795] Updated weights for policy 0, policy_version 245302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:53,975][24592] Fps is (10 sec: 41778.9, 60 sec: 42052.3, 300 sec: 41959.7). Total num frames: 2009546752. Throughput: 0: 10526.9. Samples: 252385140. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:53,976][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:55,114][626795] Updated weights for policy 0, policy_version 245312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:56,950][626795] Updated weights for policy 0, policy_version 245322 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:52:58,952][626795] Updated weights for policy 0, policy_version 245332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:58,975][24592] Fps is (10 sec: 41779.6, 60 sec: 42052.2, 300 sec: 41959.8). Total num frames: 2009759744. Throughput: 0: 10517.5. Samples: 252416502. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:52:58,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:00,907][626795] Updated weights for policy 0, policy_version 245342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:02,830][626795] Updated weights for policy 0, policy_version 245352 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:03,976][24592] Fps is (10 sec: 42596.1, 60 sec: 42189.2, 300 sec: 41987.4). Total num frames: 2009972736. Throughput: 0: 10511.6. Samples: 252479784. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:03,988][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:03,992][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000245358_2009972736.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:04,123][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000244128_1999896576.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:04,785][626795] Updated weights for policy 0, policy_version 245362 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:06,864][626795] Updated weights for policy 0, policy_version 245372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:08,664][626795] Updated weights for policy 0, policy_version 245382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:08,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41915.7, 300 sec: 41931.9). Total num frames: 2010169344. Throughput: 0: 10500.8. Samples: 252541962. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:08,976][24592] Avg episode reward: [(0, '4.945')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:10,774][626795] Updated weights for policy 0, policy_version 245392 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:12,641][626795] Updated weights for policy 0, policy_version 245402 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:13,975][24592] Fps is (10 sec: 40962.4, 60 sec: 41915.7, 300 sec: 41932.0). Total num frames: 2010382336. Throughput: 0: 10489.2. Samples: 252573180. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:13,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:14,638][626795] Updated weights for policy 0, policy_version 245412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:16,567][626795] Updated weights for policy 0, policy_version 245422 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:18,519][626795] Updated weights for policy 0, policy_version 245432 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:18,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42052.4, 300 sec: 41931.9). Total num frames: 2010595328. Throughput: 0: 10488.9. Samples: 252636870. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:18,977][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:20,486][626795] Updated weights for policy 0, policy_version 245442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:22,439][626795] Updated weights for policy 0, policy_version 245452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:23,976][24592] Fps is (10 sec: 41774.9, 60 sec: 41915.0, 300 sec: 41959.6). Total num frames: 2010800128. Throughput: 0: 10477.0. Samples: 252699360. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:23,978][24592] Avg episode reward: [(0, '4.942')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:24,472][626795] Updated weights for policy 0, policy_version 245462 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:26,415][626795] Updated weights for policy 0, policy_version 245472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:28,330][626795] Updated weights for policy 0, policy_version 245482 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:28,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2011013120. Throughput: 0: 10453.7. Samples: 252729864. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:28,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:30,231][626795] Updated weights for policy 0, policy_version 245492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:32,244][626795] Updated weights for policy 0, policy_version 245502 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:33,975][24592] Fps is (10 sec: 42602.8, 60 sec: 42052.3, 300 sec: 41959.7). Total num frames: 2011226112. Throughput: 0: 10473.5. Samples: 252793674. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:33,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:34,238][626795] Updated weights for policy 0, policy_version 245512 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:36,043][626795] Updated weights for policy 0, policy_version 245522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:38,146][626795] Updated weights for policy 0, policy_version 245532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:38,975][24592] Fps is (10 sec: 42599.3, 60 sec: 42060.4, 300 sec: 41987.5). Total num frames: 2011439104. Throughput: 0: 10469.9. Samples: 252856284. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:38,977][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:39,950][626795] Updated weights for policy 0, policy_version 245542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:41,912][626795] Updated weights for policy 0, policy_version 245552 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:43,845][626795] Updated weights for policy 0, policy_version 245562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:43,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2011643904. Throughput: 0: 10476.3. Samples: 252887934. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:43,977][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:45,870][626795] Updated weights for policy 0, policy_version 245572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:47,793][626795] Updated weights for policy 0, policy_version 245582 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:48,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41915.8, 300 sec: 41959.7). Total num frames: 2011856896. Throughput: 0: 10487.6. Samples: 252951720. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:48,976][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:49,776][626795] Updated weights for policy 0, policy_version 245592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:51,602][626795] Updated weights for policy 0, policy_version 245602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:53,671][626795] Updated weights for policy 0, policy_version 245612 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:53,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2012061696. Throughput: 0: 10503.3. Samples: 253014612. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:53,976][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:55,650][626795] Updated weights for policy 0, policy_version 245622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:57,592][626795] Updated weights for policy 0, policy_version 245632 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2012274688. Throughput: 0: 10495.6. Samples: 253045482. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:53:58,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:53:59,526][626795] Updated weights for policy 0, policy_version 245642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:01,597][626795] Updated weights for policy 0, policy_version 245652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:03,416][626795] Updated weights for policy 0, policy_version 245662 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:03,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41779.4, 300 sec: 41959.7). Total num frames: 2012479488. Throughput: 0: 10470.1. Samples: 253108026. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:03,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:05,371][626795] Updated weights for policy 0, policy_version 245672 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:07,376][626795] Updated weights for policy 0, policy_version 245682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:08,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42052.3, 300 sec: 41987.5). Total num frames: 2012692480. Throughput: 0: 10468.6. Samples: 253170438. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:08,977][24592] Avg episode reward: [(0, '4.871')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:09,389][626795] Updated weights for policy 0, policy_version 245692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:11,368][626795] Updated weights for policy 0, policy_version 245702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:13,280][626795] Updated weights for policy 0, policy_version 245712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:13,975][24592] Fps is (10 sec: 41780.4, 60 sec: 41915.7, 300 sec: 41987.5). Total num frames: 2012897280. Throughput: 0: 10481.2. Samples: 253201518. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:13,977][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:15,296][626795] Updated weights for policy 0, policy_version 245722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:17,227][626795] Updated weights for policy 0, policy_version 245732 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:18,976][24592] Fps is (10 sec: 40958.7, 60 sec: 41779.0, 300 sec: 41931.9). Total num frames: 2013102080. Throughput: 0: 10462.7. Samples: 253264500. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:18,977][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:19,211][626795] Updated weights for policy 0, policy_version 245742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:21,146][626795] Updated weights for policy 0, policy_version 245752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:23,021][626795] Updated weights for policy 0, policy_version 245762 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:23,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41916.4, 300 sec: 41959.7). Total num frames: 2013315072. Throughput: 0: 10470.0. Samples: 253327434. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:23,977][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:25,132][626795] Updated weights for policy 0, policy_version 245772 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:27,065][626795] Updated weights for policy 0, policy_version 245782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:28,935][626795] Updated weights for policy 0, policy_version 245792 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:28,976][24592] Fps is (10 sec: 42599.3, 60 sec: 41915.8, 300 sec: 41987.5). Total num frames: 2013528064. Throughput: 0: 10443.3. Samples: 253357884. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:28,978][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:31,018][626795] Updated weights for policy 0, policy_version 245802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:32,954][626795] Updated weights for policy 0, policy_version 245812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:33,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41779.1, 300 sec: 41959.7). Total num frames: 2013732864. Throughput: 0: 10416.5. Samples: 253420464. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:33,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:34,953][626795] Updated weights for policy 0, policy_version 245822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:36,784][626795] Updated weights for policy 0, policy_version 245832 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:38,902][626795] Updated weights for policy 0, policy_version 245842 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:38,976][24592] Fps is (10 sec: 40957.0, 60 sec: 41642.1, 300 sec: 41931.9). Total num frames: 2013937664. Throughput: 0: 10413.0. Samples: 253483206. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:38,978][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:40,749][626795] Updated weights for policy 0, policy_version 245852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:42,781][626795] Updated weights for policy 0, policy_version 245862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:43,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41778.9, 300 sec: 41959.6). Total num frames: 2014150656. Throughput: 0: 10425.5. Samples: 253514634. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:43,977][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:44,685][626795] Updated weights for policy 0, policy_version 245872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:46,657][626795] Updated weights for policy 0, policy_version 245882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:48,494][626795] Updated weights for policy 0, policy_version 245892 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:48,975][24592] Fps is (10 sec: 41782.6, 60 sec: 41642.7, 300 sec: 41932.2). Total num frames: 2014355456. Throughput: 0: 10446.2. Samples: 253578102. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:48,977][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:50,517][626795] Updated weights for policy 0, policy_version 245902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:52,456][626795] Updated weights for policy 0, policy_version 245912 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:53,975][24592] Fps is (10 sec: 41780.9, 60 sec: 41779.3, 300 sec: 41959.7). Total num frames: 2014568448. Throughput: 0: 10451.2. Samples: 253640742. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:53,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:54,485][626795] Updated weights for policy 0, policy_version 245922 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:56,415][626795] Updated weights for policy 0, policy_version 245932 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:54:58,357][626795] Updated weights for policy 0, policy_version 245942 (0.0031)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:58,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41642.7, 300 sec: 41904.2). Total num frames: 2014773248. Throughput: 0: 10446.8. Samples: 253671624. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:54:58,977][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:00,383][626795] Updated weights for policy 0, policy_version 245952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:02,294][626795] Updated weights for policy 0, policy_version 245962 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:03,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.3, 300 sec: 41931.9). Total num frames: 2014986240. Throughput: 0: 10449.9. Samples: 253734744. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:03,976][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000245970_2014986240.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:04,115][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000244742_2004926464.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:04,297][626795] Updated weights for policy 0, policy_version 245972 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:06,279][626795] Updated weights for policy 0, policy_version 245982 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:08,272][626795] Updated weights for policy 0, policy_version 245992 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:08,977][24592] Fps is (10 sec: 41772.3, 60 sec: 41641.5, 300 sec: 41903.9). Total num frames: 2015191040. Throughput: 0: 10425.4. Samples: 253796592. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:08,978][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:10,262][626795] Updated weights for policy 0, policy_version 246002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:12,179][626795] Updated weights for policy 0, policy_version 246012 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:13,976][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.1, 300 sec: 41932.0). Total num frames: 2015404032. Throughput: 0: 10442.7. Samples: 253827804. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:13,977][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:14,188][626795] Updated weights for policy 0, policy_version 246022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:16,098][626795] Updated weights for policy 0, policy_version 246032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:18,116][626795] Updated weights for policy 0, policy_version 246042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:18,976][24592] Fps is (10 sec: 42603.9, 60 sec: 41915.7, 300 sec: 41931.9). Total num frames: 2015617024. Throughput: 0: 10442.4. Samples: 253890372. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:18,977][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:20,103][626795] Updated weights for policy 0, policy_version 246052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:21,939][626795] Updated weights for policy 0, policy_version 246062 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:23,811][626795] Updated weights for policy 0, policy_version 246072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:23,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41915.8, 300 sec: 41932.0). Total num frames: 2015830016. Throughput: 0: 10465.9. Samples: 253954164. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:23,976][24592] Avg episode reward: [(0, '4.864')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:25,835][626795] Updated weights for policy 0, policy_version 246082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:27,710][626795] Updated weights for policy 0, policy_version 246092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:28,141][626772] Signal inference workers to stop experience collection... (3400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:28,141][626772] Signal inference workers to resume experience collection... (3400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:28,157][626795] InferenceWorker_p0-w0: stopping experience collection (3400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:28,157][626795] InferenceWorker_p0-w0: resuming experience collection (3400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:28,975][24592] Fps is (10 sec: 41780.8, 60 sec: 41779.3, 300 sec: 41904.2). Total num frames: 2016034816. Throughput: 0: 10443.8. Samples: 253984602. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:28,976][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:29,773][626795] Updated weights for policy 0, policy_version 246102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:31,764][626795] Updated weights for policy 0, policy_version 246112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:33,624][626795] Updated weights for policy 0, policy_version 246122 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:33,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41779.3, 300 sec: 41904.2). Total num frames: 2016239616. Throughput: 0: 10433.1. Samples: 254047590. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:33,977][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:35,646][626795] Updated weights for policy 0, policy_version 246132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:37,610][626795] Updated weights for policy 0, policy_version 246142 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:38,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41916.3, 300 sec: 41932.0). Total num frames: 2016452608. Throughput: 0: 10433.9. Samples: 254110266. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:38,976][24592] Avg episode reward: [(0, '4.887')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:39,564][626795] Updated weights for policy 0, policy_version 246152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:41,596][626795] Updated weights for policy 0, policy_version 246162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:43,456][626795] Updated weights for policy 0, policy_version 246172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.5, 300 sec: 41904.2). Total num frames: 2016657408. Throughput: 0: 10442.5. Samples: 254141538. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:43,977][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:45,390][626795] Updated weights for policy 0, policy_version 246182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:47,399][626795] Updated weights for policy 0, policy_version 246192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:48,976][24592] Fps is (10 sec: 41778.9, 60 sec: 41915.7, 300 sec: 41931.9). Total num frames: 2016870400. Throughput: 0: 10455.7. Samples: 254205252. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:48,978][24592] Avg episode reward: [(0, '4.937')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:49,346][626795] Updated weights for policy 0, policy_version 246202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:51,317][626795] Updated weights for policy 0, policy_version 246212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:53,263][626795] Updated weights for policy 0, policy_version 246222 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:53,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.2, 300 sec: 41904.3). Total num frames: 2017075200. Throughput: 0: 10463.9. Samples: 254267448. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:53,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:55,336][626795] Updated weights for policy 0, policy_version 246232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:57,088][626795] Updated weights for policy 0, policy_version 246242 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2017288192. Throughput: 0: 10467.5. Samples: 254298840. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:55:58,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:55:59,179][626795] Updated weights for policy 0, policy_version 246252 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:01,123][626795] Updated weights for policy 0, policy_version 246262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:03,143][626795] Updated weights for policy 0, policy_version 246272 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:03,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41779.1, 300 sec: 41904.2). Total num frames: 2017492992. Throughput: 0: 10446.7. Samples: 254360472. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:03,977][24592] Avg episode reward: [(0, '4.779')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:05,335][626795] Updated weights for policy 0, policy_version 246282 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:07,283][626795] Updated weights for policy 0, policy_version 246292 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:08,975][24592] Fps is (10 sec: 38502.2, 60 sec: 41370.7, 300 sec: 41793.2). Total num frames: 2017673216. Throughput: 0: 10337.2. Samples: 254419338. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:08,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:09,892][626795] Updated weights for policy 0, policy_version 246302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:11,969][626795] Updated weights for policy 0, policy_version 246312 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:13,824][626795] Updated weights for policy 0, policy_version 246322 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:13,975][24592] Fps is (10 sec: 37684.1, 60 sec: 41096.6, 300 sec: 41765.5). Total num frames: 2017869824. Throughput: 0: 10226.8. Samples: 254444808. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:13,978][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:15,817][626795] Updated weights for policy 0, policy_version 246332 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:17,658][626795] Updated weights for policy 0, policy_version 246342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:18,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41096.8, 300 sec: 41737.6). Total num frames: 2018082816. Throughput: 0: 10222.8. Samples: 254507616. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:18,976][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:19,769][626795] Updated weights for policy 0, policy_version 246352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:21,655][626795] Updated weights for policy 0, policy_version 246362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:23,552][626795] Updated weights for policy 0, policy_version 246372 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:23,976][24592] Fps is (10 sec: 42596.7, 60 sec: 41096.3, 300 sec: 41765.4). Total num frames: 2018295808. Throughput: 0: 10256.3. Samples: 254571804. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:23,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:25,409][626795] Updated weights for policy 0, policy_version 246382 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:27,458][626795] Updated weights for policy 0, policy_version 246392 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:28,976][24592] Fps is (10 sec: 42594.2, 60 sec: 41232.4, 300 sec: 41793.0). Total num frames: 2018508800. Throughput: 0: 10258.2. Samples: 254603166. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:28,977][24592] Avg episode reward: [(0, '4.920')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:29,442][626795] Updated weights for policy 0, policy_version 246402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:31,361][626795] Updated weights for policy 0, policy_version 246412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:33,281][626795] Updated weights for policy 0, policy_version 246422 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:33,975][24592] Fps is (10 sec: 41780.8, 60 sec: 41233.0, 300 sec: 41765.3). Total num frames: 2018713600. Throughput: 0: 10247.2. Samples: 254666376. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:33,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:35,242][626795] Updated weights for policy 0, policy_version 246432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:37,080][626795] Updated weights for policy 0, policy_version 246442 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:38,975][24592] Fps is (10 sec: 41783.3, 60 sec: 41233.1, 300 sec: 41793.1). Total num frames: 2018926592. Throughput: 0: 10270.9. Samples: 254729640. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:38,976][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:39,159][626795] Updated weights for policy 0, policy_version 246452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:41,072][626795] Updated weights for policy 0, policy_version 246462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:43,052][626795] Updated weights for policy 0, policy_version 246472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:43,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41233.0, 300 sec: 41765.3). Total num frames: 2019131392. Throughput: 0: 10264.7. Samples: 254760750. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:43,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:45,023][626795] Updated weights for policy 0, policy_version 246482 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:46,971][626795] Updated weights for policy 0, policy_version 246492 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:48,901][626795] Updated weights for policy 0, policy_version 246502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:48,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41233.1, 300 sec: 41765.3). Total num frames: 2019344384. Throughput: 0: 10283.3. Samples: 254823216. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:48,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:50,829][626795] Updated weights for policy 0, policy_version 246512 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:52,837][626795] Updated weights for policy 0, policy_version 246522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:53,975][24592] Fps is (10 sec: 42599.0, 60 sec: 41369.6, 300 sec: 41765.3). Total num frames: 2019557376. Throughput: 0: 10379.8. Samples: 254886426. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:53,977][24592] Avg episode reward: [(0, '4.956')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:54,754][626795] Updated weights for policy 0, policy_version 246532 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:56,769][626795] Updated weights for policy 0, policy_version 246542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:56:58,617][626795] Updated weights for policy 0, policy_version 246552 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:58,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41233.1, 300 sec: 41765.5). Total num frames: 2019762176. Throughput: 0: 10502.3. Samples: 254917410. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:56:58,976][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:00,692][626795] Updated weights for policy 0, policy_version 246562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:02,655][626795] Updated weights for policy 0, policy_version 246572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:03,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41369.8, 300 sec: 41765.3). Total num frames: 2019975168. Throughput: 0: 10499.2. Samples: 254980080. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:03,976][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000246579_2019975168.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:04,093][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000245358_2009972736.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:04,705][626795] Updated weights for policy 0, policy_version 246582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:06,677][626795] Updated weights for policy 0, policy_version 246592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:08,642][626795] Updated weights for policy 0, policy_version 246602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:08,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41642.7, 300 sec: 41709.8). Total num frames: 2020171776. Throughput: 0: 10437.7. Samples: 255041496. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:08,977][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:10,710][626795] Updated weights for policy 0, policy_version 246612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:12,611][626795] Updated weights for policy 0, policy_version 246622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:13,976][24592] Fps is (10 sec: 40140.3, 60 sec: 41779.1, 300 sec: 41709.8). Total num frames: 2020376576. Throughput: 0: 10417.7. Samples: 255071952. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:13,977][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:14,647][626795] Updated weights for policy 0, policy_version 246632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:16,552][626795] Updated weights for policy 0, policy_version 246642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:18,564][626795] Updated weights for policy 0, policy_version 246652 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:18,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41779.1, 300 sec: 41709.8). Total num frames: 2020589568. Throughput: 0: 10413.8. Samples: 255135000. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:18,977][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:20,435][626795] Updated weights for policy 0, policy_version 246662 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:22,414][626795] Updated weights for policy 0, policy_version 246672 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:23,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41779.5, 300 sec: 41709.8). Total num frames: 2020802560. Throughput: 0: 10406.1. Samples: 255197916. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:23,977][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:24,429][626795] Updated weights for policy 0, policy_version 246682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:26,400][626795] Updated weights for policy 0, policy_version 246692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:28,262][626795] Updated weights for policy 0, policy_version 246702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:28,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41643.3, 300 sec: 41709.8). Total num frames: 2021007360. Throughput: 0: 10416.7. Samples: 255229500. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:28,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:30,209][626795] Updated weights for policy 0, policy_version 246712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:32,161][626795] Updated weights for policy 0, policy_version 246722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:33,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41779.1, 300 sec: 41711.4). Total num frames: 2021220352. Throughput: 0: 10421.3. Samples: 255292176. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:33,979][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:34,279][626795] Updated weights for policy 0, policy_version 246732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:36,092][626795] Updated weights for policy 0, policy_version 246742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:38,105][626795] Updated weights for policy 0, policy_version 246752 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:38,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41642.5, 300 sec: 41682.0). Total num frames: 2021425152. Throughput: 0: 10413.1. Samples: 255355020. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:38,978][24592] Avg episode reward: [(0, '4.415')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:40,140][626795] Updated weights for policy 0, policy_version 246762 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:42,115][626795] Updated weights for policy 0, policy_version 246772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:43,964][626795] Updated weights for policy 0, policy_version 246782 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:43,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41779.3, 300 sec: 41682.0). Total num frames: 2021638144. Throughput: 0: 10404.3. Samples: 255385602. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:43,978][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:46,009][626795] Updated weights for policy 0, policy_version 246792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:47,985][626795] Updated weights for policy 0, policy_version 246802 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:48,975][24592] Fps is (10 sec: 41780.4, 60 sec: 41642.7, 300 sec: 41682.0). Total num frames: 2021842944. Throughput: 0: 10408.3. Samples: 255448452. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:48,978][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:49,842][626795] Updated weights for policy 0, policy_version 246812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:51,786][626795] Updated weights for policy 0, policy_version 246822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:53,724][626795] Updated weights for policy 0, policy_version 246832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:53,981][24592] Fps is (10 sec: 41756.4, 60 sec: 41638.8, 300 sec: 41681.2). Total num frames: 2022055936. Throughput: 0: 10452.9. Samples: 255511932. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:53,982][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:55,825][626795] Updated weights for policy 0, policy_version 246842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:57,727][626795] Updated weights for policy 0, policy_version 246852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:58,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41642.7, 300 sec: 41654.3). Total num frames: 2022260736. Throughput: 0: 10460.4. Samples: 255542670. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:57:58,976][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:57:59,621][626795] Updated weights for policy 0, policy_version 246862 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:01,595][626795] Updated weights for policy 0, policy_version 246872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:03,487][626795] Updated weights for policy 0, policy_version 246882 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:03,975][24592] Fps is (10 sec: 41802.1, 60 sec: 41642.7, 300 sec: 41709.8). Total num frames: 2022473728. Throughput: 0: 10460.8. Samples: 255605736. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:03,977][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:05,532][626795] Updated weights for policy 0, policy_version 246892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:07,477][626795] Updated weights for policy 0, policy_version 246902 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:08,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41779.1, 300 sec: 41682.0). Total num frames: 2022678528. Throughput: 0: 10453.7. Samples: 255668334. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:08,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:09,393][626795] Updated weights for policy 0, policy_version 246912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:11,451][626795] Updated weights for policy 0, policy_version 246922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:13,319][626795] Updated weights for policy 0, policy_version 246932 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:13,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.8, 300 sec: 41682.0). Total num frames: 2022891520. Throughput: 0: 10445.1. Samples: 255699528. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:13,976][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:15,406][626795] Updated weights for policy 0, policy_version 246942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:17,364][626795] Updated weights for policy 0, policy_version 246952 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:18,976][24592] Fps is (10 sec: 41777.3, 60 sec: 41778.9, 300 sec: 41682.1). Total num frames: 2023096320. Throughput: 0: 10438.7. Samples: 255761922. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:18,977][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:19,285][626795] Updated weights for policy 0, policy_version 246962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:21,180][626795] Updated weights for policy 0, policy_version 246972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:23,195][626795] Updated weights for policy 0, policy_version 246982 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:23,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.2, 300 sec: 41682.0). Total num frames: 2023309312. Throughput: 0: 10449.8. Samples: 255825258. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:23,977][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:25,112][626795] Updated weights for policy 0, policy_version 246992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:27,108][626795] Updated weights for policy 0, policy_version 247002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:28,975][24592] Fps is (10 sec: 41781.8, 60 sec: 41779.3, 300 sec: 41654.2). Total num frames: 2023514112. Throughput: 0: 10467.1. Samples: 255856620. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:28,977][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:29,015][626795] Updated weights for policy 0, policy_version 247012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:31,005][626795] Updated weights for policy 0, policy_version 247022 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:32,947][626795] Updated weights for policy 0, policy_version 247032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:33,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.3, 300 sec: 41654.2). Total num frames: 2023727104. Throughput: 0: 10466.5. Samples: 255919446. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:33,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:34,975][626795] Updated weights for policy 0, policy_version 247042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:36,898][626795] Updated weights for policy 0, policy_version 247052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:38,822][626795] Updated weights for policy 0, policy_version 247062 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:38,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41779.4, 300 sec: 41654.2). Total num frames: 2023931904. Throughput: 0: 10454.1. Samples: 255982308. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:38,976][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:40,648][626795] Updated weights for policy 0, policy_version 247072 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:42,723][626795] Updated weights for policy 0, policy_version 247082 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:43,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.2, 300 sec: 41654.2). Total num frames: 2024144896. Throughput: 0: 10458.3. Samples: 256013292. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:43,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:44,791][626795] Updated weights for policy 0, policy_version 247092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:46,728][626795] Updated weights for policy 0, policy_version 247102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:48,556][626795] Updated weights for policy 0, policy_version 247112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:48,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41779.1, 300 sec: 41654.2). Total num frames: 2024349696. Throughput: 0: 10454.6. Samples: 256076196. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:48,979][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:50,629][626795] Updated weights for policy 0, policy_version 247122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:52,521][626795] Updated weights for policy 0, policy_version 247132 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:53,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41783.0, 300 sec: 41654.2). Total num frames: 2024562688. Throughput: 0: 10465.2. Samples: 256139268. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:53,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:54,435][626795] Updated weights for policy 0, policy_version 247142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:56,487][626795] Updated weights for policy 0, policy_version 247152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:58:58,369][626795] Updated weights for policy 0, policy_version 247162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:58,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41779.2, 300 sec: 41654.3). Total num frames: 2024767488. Throughput: 0: 10463.2. Samples: 256170372. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:58:58,976][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:00,382][626795] Updated weights for policy 0, policy_version 247172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:02,365][626795] Updated weights for policy 0, policy_version 247182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:03,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41779.0, 300 sec: 41654.2). Total num frames: 2024980480. Throughput: 0: 10474.0. Samples: 256233246. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:03,976][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:03,986][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000247191_2024988672.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:04,074][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000245970_2014986240.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:04,273][626795] Updated weights for policy 0, policy_version 247192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:06,239][626795] Updated weights for policy 0, policy_version 247202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:08,172][626795] Updated weights for policy 0, policy_version 247212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:08,976][24592] Fps is (10 sec: 41776.9, 60 sec: 41778.9, 300 sec: 41654.2). Total num frames: 2025185280. Throughput: 0: 10464.7. Samples: 256296174. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:08,978][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:10,107][626795] Updated weights for policy 0, policy_version 247222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:12,145][626795] Updated weights for policy 0, policy_version 247232 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:13,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41779.2, 300 sec: 41682.0). Total num frames: 2025398272. Throughput: 0: 10449.5. Samples: 256326846. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:13,977][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:14,017][626795] Updated weights for policy 0, policy_version 247242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:16,004][626795] Updated weights for policy 0, policy_version 247252 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:17,912][626795] Updated weights for policy 0, policy_version 247262 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:18,975][24592] Fps is (10 sec: 42600.6, 60 sec: 41916.1, 300 sec: 41682.0). Total num frames: 2025611264. Throughput: 0: 10456.4. Samples: 256389984. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:18,977][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:20,030][626795] Updated weights for policy 0, policy_version 247272 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:21,892][626795] Updated weights for policy 0, policy_version 247282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:23,844][626795] Updated weights for policy 0, policy_version 247292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:23,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41779.1, 300 sec: 41654.2). Total num frames: 2025816064. Throughput: 0: 10448.9. Samples: 256452510. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:23,978][24592] Avg episode reward: [(0, '4.853')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:25,847][626795] Updated weights for policy 0, policy_version 247302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:27,705][626795] Updated weights for policy 0, policy_version 247312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:28,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41915.7, 300 sec: 41682.0). Total num frames: 2026029056. Throughput: 0: 10454.7. Samples: 256483752. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:28,992][24592] Avg episode reward: [(0, '4.932')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:29,709][626795] Updated weights for policy 0, policy_version 247322 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:31,715][626795] Updated weights for policy 0, policy_version 247332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:33,638][626795] Updated weights for policy 0, policy_version 247342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:33,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.2, 300 sec: 41682.1). Total num frames: 2026233856. Throughput: 0: 10449.5. Samples: 256546422. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:33,976][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:35,748][626795] Updated weights for policy 0, policy_version 247352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:37,695][626795] Updated weights for policy 0, policy_version 247362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:38,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41779.2, 300 sec: 41654.3). Total num frames: 2026438656. Throughput: 0: 10425.3. Samples: 256608408. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:38,976][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:39,616][626795] Updated weights for policy 0, policy_version 247372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:41,536][626795] Updated weights for policy 0, policy_version 247382 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:43,594][626795] Updated weights for policy 0, policy_version 247392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:43,976][24592] Fps is (10 sec: 41777.0, 60 sec: 41778.8, 300 sec: 41681.9). Total num frames: 2026651648. Throughput: 0: 10429.2. Samples: 256639692. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:43,977][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:45,600][626795] Updated weights for policy 0, policy_version 247402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:47,550][626795] Updated weights for policy 0, policy_version 247412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:48,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.3, 300 sec: 41654.2). Total num frames: 2026856448. Throughput: 0: 10399.8. Samples: 256701234. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:48,977][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:49,551][626795] Updated weights for policy 0, policy_version 247422 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:51,476][626795] Updated weights for policy 0, policy_version 247432 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:53,430][626795] Updated weights for policy 0, policy_version 247442 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:53,976][24592] Fps is (10 sec: 40961.3, 60 sec: 41642.5, 300 sec: 41654.2). Total num frames: 2027061248. Throughput: 0: 10400.3. Samples: 256764186. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:53,977][24592] Avg episode reward: [(0, '4.380')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:55,449][626795] Updated weights for policy 0, policy_version 247452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:57,408][626795] Updated weights for policy 0, policy_version 247462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:58,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41642.7, 300 sec: 41626.5). Total num frames: 2027266048. Throughput: 0: 10392.9. Samples: 256794528. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 14:59:58,977][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 14:59:59,357][626795] Updated weights for policy 0, policy_version 247472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:01,284][626795] Updated weights for policy 0, policy_version 247482 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:03,190][626795] Updated weights for policy 0, policy_version 247492 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:03,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41642.8, 300 sec: 41654.5). Total num frames: 2027479040. Throughput: 0: 10401.9. Samples: 256858068. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:03,977][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:05,260][626795] Updated weights for policy 0, policy_version 247502 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:07,247][626795] Updated weights for policy 0, policy_version 247512 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:08,976][24592] Fps is (10 sec: 42596.3, 60 sec: 41779.2, 300 sec: 41654.2). Total num frames: 2027692032. Throughput: 0: 10390.7. Samples: 256920096. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:08,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:09,261][626795] Updated weights for policy 0, policy_version 247522 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:11,161][626795] Updated weights for policy 0, policy_version 247532 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:13,155][626795] Updated weights for policy 0, policy_version 247542 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41642.7, 300 sec: 41626.5). Total num frames: 2027896832. Throughput: 0: 10388.0. Samples: 256951212. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:13,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:15,049][626795] Updated weights for policy 0, policy_version 247552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:17,116][626795] Updated weights for policy 0, policy_version 247562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:18,941][626795] Updated weights for policy 0, policy_version 247572 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:18,975][24592] Fps is (10 sec: 41780.9, 60 sec: 41642.6, 300 sec: 41626.5). Total num frames: 2028109824. Throughput: 0: 10379.5. Samples: 257013498. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:18,978][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:21,002][626795] Updated weights for policy 0, policy_version 247582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:22,887][626795] Updated weights for policy 0, policy_version 247592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:23,976][24592] Fps is (10 sec: 40959.6, 60 sec: 41506.2, 300 sec: 41598.7). Total num frames: 2028306432. Throughput: 0: 10385.6. Samples: 257075760. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:23,978][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:24,974][626795] Updated weights for policy 0, policy_version 247602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:26,859][626795] Updated weights for policy 0, policy_version 247612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:28,893][626795] Updated weights for policy 0, policy_version 247622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:28,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41506.1, 300 sec: 41626.5). Total num frames: 2028519424. Throughput: 0: 10383.0. Samples: 257106924. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:28,977][24592] Avg episode reward: [(0, '4.870')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:30,881][626795] Updated weights for policy 0, policy_version 247632 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:32,788][626795] Updated weights for policy 0, policy_version 247642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.1, 300 sec: 41598.7). Total num frames: 2028724224. Throughput: 0: 10416.0. Samples: 257169954. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:33,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:34,862][626795] Updated weights for policy 0, policy_version 247652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:36,785][626795] Updated weights for policy 0, policy_version 247662 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:38,710][626795] Updated weights for policy 0, policy_version 247672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:38,978][24592] Fps is (10 sec: 41769.8, 60 sec: 41641.0, 300 sec: 41626.1). Total num frames: 2028937216. Throughput: 0: 10397.8. Samples: 257232108. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:38,980][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:40,722][626795] Updated weights for policy 0, policy_version 247682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:42,675][626795] Updated weights for policy 0, policy_version 247692 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:43,977][24592] Fps is (10 sec: 41772.1, 60 sec: 41505.3, 300 sec: 41598.5). Total num frames: 2029142016. Throughput: 0: 10408.0. Samples: 257262906. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:43,978][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:44,725][626795] Updated weights for policy 0, policy_version 247702 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:46,687][626795] Updated weights for policy 0, policy_version 247712 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:48,671][626795] Updated weights for policy 0, policy_version 247722 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:48,977][24592] Fps is (10 sec: 40964.7, 60 sec: 41505.3, 300 sec: 41598.5). Total num frames: 2029346816. Throughput: 0: 10383.3. Samples: 257325330. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:48,977][24592] Avg episode reward: [(0, '4.282')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:50,751][626795] Updated weights for policy 0, policy_version 247732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:52,721][626795] Updated weights for policy 0, policy_version 247742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:53,975][24592] Fps is (10 sec: 40967.3, 60 sec: 41506.3, 300 sec: 41570.9). Total num frames: 2029551616. Throughput: 0: 10349.7. Samples: 257385828. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:53,977][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:54,719][626795] Updated weights for policy 0, policy_version 247752 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:56,735][626795] Updated weights for policy 0, policy_version 247762 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:00:58,727][626795] Updated weights for policy 0, policy_version 247772 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:58,983][24592] Fps is (10 sec: 40933.9, 60 sec: 41500.9, 300 sec: 41569.9). Total num frames: 2029756416. Throughput: 0: 10345.7. Samples: 257416848. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:00:58,984][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:00,638][626795] Updated weights for policy 0, policy_version 247782 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:02,652][626795] Updated weights for policy 0, policy_version 247792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:03,976][24592] Fps is (10 sec: 40957.8, 60 sec: 41369.2, 300 sec: 41654.2). Total num frames: 2029961216. Throughput: 0: 10346.4. Samples: 257479092. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:03,977][24592] Avg episode reward: [(0, '4.869')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:04,013][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000247799_2029969408.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:04,086][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000246579_2019975168.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:04,672][626795] Updated weights for policy 0, policy_version 247802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:06,770][626795] Updated weights for policy 0, policy_version 247812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:08,662][626795] Updated weights for policy 0, policy_version 247822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:08,975][24592] Fps is (10 sec: 40991.0, 60 sec: 41233.4, 300 sec: 41682.0). Total num frames: 2030166016. Throughput: 0: 10326.4. Samples: 257540448. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:08,978][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:10,603][626795] Updated weights for policy 0, policy_version 247832 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:12,596][626795] Updated weights for policy 0, policy_version 247842 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:13,975][24592] Fps is (10 sec: 40962.2, 60 sec: 41233.1, 300 sec: 41654.2). Total num frames: 2030370816. Throughput: 0: 10319.1. Samples: 257571282. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:13,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:14,647][626795] Updated weights for policy 0, policy_version 247852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:16,462][626795] Updated weights for policy 0, policy_version 247862 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:18,461][626795] Updated weights for policy 0, policy_version 247872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41233.1, 300 sec: 41654.3). Total num frames: 2030583808. Throughput: 0: 10317.2. Samples: 257634228. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:18,977][24592] Avg episode reward: [(0, '4.940')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:20,490][626795] Updated weights for policy 0, policy_version 247882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:22,507][626795] Updated weights for policy 0, policy_version 247892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:23,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41369.6, 300 sec: 41626.6). Total num frames: 2030788608. Throughput: 0: 10308.4. Samples: 257695962. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:23,976][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:24,469][626795] Updated weights for policy 0, policy_version 247902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:26,357][626795] Updated weights for policy 0, policy_version 247912 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:28,273][626795] Updated weights for policy 0, policy_version 247922 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:28,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41369.7, 300 sec: 41654.2). Total num frames: 2031001600. Throughput: 0: 10306.0. Samples: 257726658. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:28,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:30,372][626795] Updated weights for policy 0, policy_version 247932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:32,346][626795] Updated weights for policy 0, policy_version 247942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:33,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41369.5, 300 sec: 41626.4). Total num frames: 2031206400. Throughput: 0: 10318.5. Samples: 257789652. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:33,977][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:34,287][626795] Updated weights for policy 0, policy_version 247952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:36,309][626795] Updated weights for policy 0, policy_version 247962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:38,317][626795] Updated weights for policy 0, policy_version 247972 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:38,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41234.6, 300 sec: 41626.5). Total num frames: 2031411200. Throughput: 0: 10342.0. Samples: 257851218. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:38,976][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:40,205][626795] Updated weights for policy 0, policy_version 247982 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:42,163][626795] Updated weights for policy 0, policy_version 247992 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:43,976][24592] Fps is (10 sec: 40957.3, 60 sec: 41233.6, 300 sec: 41598.6). Total num frames: 2031616000. Throughput: 0: 10356.5. Samples: 257882820. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:43,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:44,157][626795] Updated weights for policy 0, policy_version 248002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:46,107][626795] Updated weights for policy 0, policy_version 248012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:48,068][626795] Updated weights for policy 0, policy_version 248022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:48,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41370.4, 300 sec: 41598.7). Total num frames: 2031828992. Throughput: 0: 10370.5. Samples: 257945760. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:48,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:49,987][626795] Updated weights for policy 0, policy_version 248032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:51,952][626795] Updated weights for policy 0, policy_version 248042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:53,904][626795] Updated weights for policy 0, policy_version 248052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:53,976][24592] Fps is (10 sec: 42600.6, 60 sec: 41505.8, 300 sec: 41626.4). Total num frames: 2032041984. Throughput: 0: 10413.9. Samples: 258009078. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:53,977][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:55,868][626795] Updated weights for policy 0, policy_version 248062 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:57,775][626795] Updated weights for policy 0, policy_version 248072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:58,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41511.3, 300 sec: 41598.7). Total num frames: 2032246784. Throughput: 0: 10413.9. Samples: 258039906. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:01:58,976][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:01:59,837][626795] Updated weights for policy 0, policy_version 248082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:01,712][626795] Updated weights for policy 0, policy_version 248092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:03,679][626795] Updated weights for policy 0, policy_version 248102 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:03,976][24592] Fps is (10 sec: 41778.7, 60 sec: 41642.6, 300 sec: 41654.2). Total num frames: 2032459776. Throughput: 0: 10404.5. Samples: 258102438. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:03,982][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:05,610][626795] Updated weights for policy 0, policy_version 248112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:07,589][626795] Updated weights for policy 0, policy_version 248122 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.2, 300 sec: 41682.0). Total num frames: 2032672768. Throughput: 0: 10434.8. Samples: 258165528. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:08,978][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:09,570][626795] Updated weights for policy 0, policy_version 248132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:11,647][626795] Updated weights for policy 0, policy_version 248142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:13,542][626795] Updated weights for policy 0, policy_version 248152 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:13,976][24592] Fps is (10 sec: 41779.5, 60 sec: 41778.8, 300 sec: 41654.2). Total num frames: 2032877568. Throughput: 0: 10433.1. Samples: 258196152. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:13,978][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:15,447][626795] Updated weights for policy 0, policy_version 248162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:17,379][626795] Updated weights for policy 0, policy_version 248172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:18,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.2, 300 sec: 41654.2). Total num frames: 2033090560. Throughput: 0: 10448.5. Samples: 258259830. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:18,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:19,445][626795] Updated weights for policy 0, policy_version 248182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:21,289][626795] Updated weights for policy 0, policy_version 248192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:23,317][626795] Updated weights for policy 0, policy_version 248202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:23,976][24592] Fps is (10 sec: 41780.7, 60 sec: 41779.1, 300 sec: 41654.2). Total num frames: 2033295360. Throughput: 0: 10457.8. Samples: 258321822. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:23,976][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:25,295][626795] Updated weights for policy 0, policy_version 248212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:27,277][626795] Updated weights for policy 0, policy_version 248222 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:28,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.1, 300 sec: 41654.3). Total num frames: 2033508352. Throughput: 0: 10445.9. Samples: 258352878. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:28,976][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:29,243][626795] Updated weights for policy 0, policy_version 248232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:31,250][626795] Updated weights for policy 0, policy_version 248242 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:33,150][626795] Updated weights for policy 0, policy_version 248252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:33,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.4, 300 sec: 41654.3). Total num frames: 2033713152. Throughput: 0: 10432.0. Samples: 258415200. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:33,976][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:35,164][626795] Updated weights for policy 0, policy_version 248262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:37,122][626795] Updated weights for policy 0, policy_version 248272 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:38,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41779.2, 300 sec: 41626.5). Total num frames: 2033917952. Throughput: 0: 10418.4. Samples: 258477900. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:38,977][24592] Avg episode reward: [(0, '4.446')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:39,068][626795] Updated weights for policy 0, policy_version 248282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:41,199][626795] Updated weights for policy 0, policy_version 248292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:43,062][626795] Updated weights for policy 0, policy_version 248302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:43,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41779.6, 300 sec: 41626.4). Total num frames: 2034122752. Throughput: 0: 10414.5. Samples: 258508560. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:43,976][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:45,051][626795] Updated weights for policy 0, policy_version 248312 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:46,896][626795] Updated weights for policy 0, policy_version 248322 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:48,168][626772] Signal inference workers to stop experience collection... (3450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:48,169][626772] Signal inference workers to resume experience collection... (3450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:48,197][626795] InferenceWorker_p0-w0: stopping experience collection (3450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:48,198][626795] InferenceWorker_p0-w0: resuming experience collection (3450 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:48,911][626795] Updated weights for policy 0, policy_version 248332 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.2, 300 sec: 41627.2). Total num frames: 2034335744. Throughput: 0: 10423.9. Samples: 258571506. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:48,977][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:50,889][626795] Updated weights for policy 0, policy_version 248342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:52,810][626795] Updated weights for policy 0, policy_version 248352 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:53,976][24592] Fps is (10 sec: 41777.0, 60 sec: 41642.4, 300 sec: 41626.3). Total num frames: 2034540544. Throughput: 0: 10422.2. Samples: 258634536. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:53,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:54,862][626795] Updated weights for policy 0, policy_version 248362 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:56,753][626795] Updated weights for policy 0, policy_version 248372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:02:58,666][626795] Updated weights for policy 0, policy_version 248382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:58,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.2, 300 sec: 41626.5). Total num frames: 2034753536. Throughput: 0: 10423.3. Samples: 258665196. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:02:58,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:00,733][626795] Updated weights for policy 0, policy_version 248392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:02,674][626795] Updated weights for policy 0, policy_version 248402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:03,975][24592] Fps is (10 sec: 41782.4, 60 sec: 41643.0, 300 sec: 41626.5). Total num frames: 2034958336. Throughput: 0: 10391.4. Samples: 258727446. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:03,976][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000248408_2034958336.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:04,054][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000247191_2024988672.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:04,796][626795] Updated weights for policy 0, policy_version 248412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:06,618][626795] Updated weights for policy 0, policy_version 248422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:08,658][626795] Updated weights for policy 0, policy_version 248432 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:08,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41506.1, 300 sec: 41598.7). Total num frames: 2035163136. Throughput: 0: 10393.4. Samples: 258789522. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:08,979][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:10,720][626795] Updated weights for policy 0, policy_version 248442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:12,607][626795] Updated weights for policy 0, policy_version 248452 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:13,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41643.0, 300 sec: 41626.6). Total num frames: 2035376128. Throughput: 0: 10389.2. Samples: 258820392. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:13,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:14,543][626795] Updated weights for policy 0, policy_version 248462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:16,526][626795] Updated weights for policy 0, policy_version 248472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:18,410][626795] Updated weights for policy 0, policy_version 248482 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:18,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41505.9, 300 sec: 41598.6). Total num frames: 2035580928. Throughput: 0: 10415.4. Samples: 258883896. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:18,977][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:20,374][626795] Updated weights for policy 0, policy_version 248492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:22,247][626795] Updated weights for policy 0, policy_version 248502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41642.8, 300 sec: 41626.5). Total num frames: 2035793920. Throughput: 0: 10419.1. Samples: 258946758. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:23,977][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:24,318][626795] Updated weights for policy 0, policy_version 248512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:26,201][626795] Updated weights for policy 0, policy_version 248522 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:28,184][626795] Updated weights for policy 0, policy_version 248532 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:28,975][24592] Fps is (10 sec: 41781.0, 60 sec: 41506.2, 300 sec: 41598.7). Total num frames: 2035998720. Throughput: 0: 10437.3. Samples: 258978234. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:28,976][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:30,133][626795] Updated weights for policy 0, policy_version 248542 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:32,206][626795] Updated weights for policy 0, policy_version 248552 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:33,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41642.6, 300 sec: 41626.5). Total num frames: 2036211712. Throughput: 0: 10412.2. Samples: 259040058. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:33,978][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:34,083][626795] Updated weights for policy 0, policy_version 248562 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:36,117][626795] Updated weights for policy 0, policy_version 248572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:38,079][626795] Updated weights for policy 0, policy_version 248582 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:38,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41642.7, 300 sec: 41598.7). Total num frames: 2036416512. Throughput: 0: 10406.2. Samples: 259102806. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:38,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:39,996][626795] Updated weights for policy 0, policy_version 248592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:41,994][626795] Updated weights for policy 0, policy_version 248602 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:43,831][626795] Updated weights for policy 0, policy_version 248612 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:43,976][24592] Fps is (10 sec: 41779.5, 60 sec: 41779.4, 300 sec: 41626.5). Total num frames: 2036629504. Throughput: 0: 10422.7. Samples: 259134216. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:43,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:45,926][626795] Updated weights for policy 0, policy_version 248622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:47,765][626795] Updated weights for policy 0, policy_version 248632 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:48,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41779.2, 300 sec: 41626.5). Total num frames: 2036842496. Throughput: 0: 10448.6. Samples: 259197630. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:48,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:49,703][626795] Updated weights for policy 0, policy_version 248642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:51,602][626795] Updated weights for policy 0, policy_version 248652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:53,690][626795] Updated weights for policy 0, policy_version 248662 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:53,978][24592] Fps is (10 sec: 41767.9, 60 sec: 41777.9, 300 sec: 41626.1). Total num frames: 2037047296. Throughput: 0: 10473.8. Samples: 259260870. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:53,988][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:55,653][626795] Updated weights for policy 0, policy_version 248672 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:57,578][626795] Updated weights for policy 0, policy_version 248682 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.3, 300 sec: 41626.5). Total num frames: 2037260288. Throughput: 0: 10474.1. Samples: 259291728. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:03:58,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:03:59,442][626795] Updated weights for policy 0, policy_version 248692 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:01,523][626795] Updated weights for policy 0, policy_version 248702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:03,495][626795] Updated weights for policy 0, policy_version 248712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:03,975][24592] Fps is (10 sec: 41791.1, 60 sec: 41779.3, 300 sec: 41626.6). Total num frames: 2037465088. Throughput: 0: 10460.8. Samples: 259354626. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:03,978][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:05,535][626795] Updated weights for policy 0, policy_version 248722 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:07,436][626795] Updated weights for policy 0, policy_version 248732 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:08,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41915.7, 300 sec: 41626.5). Total num frames: 2037678080. Throughput: 0: 10447.7. Samples: 259416906. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:08,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:09,460][626795] Updated weights for policy 0, policy_version 248742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:11,344][626795] Updated weights for policy 0, policy_version 248752 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:13,313][626795] Updated weights for policy 0, policy_version 248762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:13,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41779.2, 300 sec: 41598.7). Total num frames: 2037882880. Throughput: 0: 10435.2. Samples: 259447818. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:13,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:15,209][626795] Updated weights for policy 0, policy_version 248772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:17,089][626795] Updated weights for policy 0, policy_version 248782 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:18,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41916.0, 300 sec: 41626.5). Total num frames: 2038095872. Throughput: 0: 10475.4. Samples: 259511448. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:18,977][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:19,150][626795] Updated weights for policy 0, policy_version 248792 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:21,053][626795] Updated weights for policy 0, policy_version 248802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:23,025][626795] Updated weights for policy 0, policy_version 248812 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:23,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41915.7, 300 sec: 41626.5). Total num frames: 2038308864. Throughput: 0: 10484.4. Samples: 259574604. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:23,977][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:25,002][626795] Updated weights for policy 0, policy_version 248822 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:26,921][626795] Updated weights for policy 0, policy_version 248832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:28,854][626795] Updated weights for policy 0, policy_version 248842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:28,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41915.7, 300 sec: 41626.5). Total num frames: 2038513664. Throughput: 0: 10480.9. Samples: 259605858. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:28,976][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:30,753][626795] Updated weights for policy 0, policy_version 248852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:32,783][626795] Updated weights for policy 0, policy_version 248862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:33,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41915.8, 300 sec: 41654.2). Total num frames: 2038726656. Throughput: 0: 10479.9. Samples: 259669224. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:33,976][24592] Avg episode reward: [(0, '4.422')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:34,692][626795] Updated weights for policy 0, policy_version 248872 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:36,668][626795] Updated weights for policy 0, policy_version 248882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:38,475][626795] Updated weights for policy 0, policy_version 248892 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:38,978][24592] Fps is (10 sec: 41769.9, 60 sec: 41914.2, 300 sec: 41626.2). Total num frames: 2038931456. Throughput: 0: 10464.3. Samples: 259731756. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:38,979][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:40,573][626795] Updated weights for policy 0, policy_version 248902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:42,526][626795] Updated weights for policy 0, policy_version 248912 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:43,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41915.8, 300 sec: 41654.2). Total num frames: 2039144448. Throughput: 0: 10473.6. Samples: 259763040. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:43,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:44,578][626795] Updated weights for policy 0, policy_version 248922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:46,385][626795] Updated weights for policy 0, policy_version 248932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:48,338][626795] Updated weights for policy 0, policy_version 248942 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:48,975][24592] Fps is (10 sec: 42608.0, 60 sec: 41915.7, 300 sec: 41682.0). Total num frames: 2039357440. Throughput: 0: 10484.1. Samples: 259826412. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:48,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:50,313][626795] Updated weights for policy 0, policy_version 248952 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:52,325][626795] Updated weights for policy 0, policy_version 248962 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:53,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41917.7, 300 sec: 41682.0). Total num frames: 2039562240. Throughput: 0: 10503.9. Samples: 259889580. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:53,978][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:54,172][626795] Updated weights for policy 0, policy_version 248972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:56,203][626795] Updated weights for policy 0, policy_version 248982 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:04:58,190][626795] Updated weights for policy 0, policy_version 248992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.7, 300 sec: 41682.0). Total num frames: 2039775232. Throughput: 0: 10506.7. Samples: 259920618. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:04:58,978][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:00,088][626795] Updated weights for policy 0, policy_version 249002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:01,998][626795] Updated weights for policy 0, policy_version 249012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:03,861][626795] Updated weights for policy 0, policy_version 249022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:03,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42052.2, 300 sec: 41682.1). Total num frames: 2039988224. Throughput: 0: 10501.3. Samples: 259984008. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:03,979][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000249022_2039988224.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:04,075][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000247799_2029969408.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:05,912][626795] Updated weights for policy 0, policy_version 249032 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:07,894][626795] Updated weights for policy 0, policy_version 249042 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:08,976][24592] Fps is (10 sec: 41777.4, 60 sec: 41915.5, 300 sec: 41681.9). Total num frames: 2040193024. Throughput: 0: 10486.3. Samples: 260046492. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:08,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:09,868][626795] Updated weights for policy 0, policy_version 249052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:11,867][626795] Updated weights for policy 0, policy_version 249062 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:13,809][626795] Updated weights for policy 0, policy_version 249072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:13,976][24592] Fps is (10 sec: 41778.6, 60 sec: 42052.1, 300 sec: 41682.0). Total num frames: 2040406016. Throughput: 0: 10481.5. Samples: 260077530. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:13,977][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:15,832][626795] Updated weights for policy 0, policy_version 249082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:17,609][626795] Updated weights for policy 0, policy_version 249092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:18,976][24592] Fps is (10 sec: 41780.4, 60 sec: 41915.6, 300 sec: 41709.8). Total num frames: 2040610816. Throughput: 0: 10467.3. Samples: 260140254. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:18,977][24592] Avg episode reward: [(0, '4.934')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:19,659][626795] Updated weights for policy 0, policy_version 249102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:21,674][626795] Updated weights for policy 0, policy_version 249112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:23,640][626795] Updated weights for policy 0, policy_version 249122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:23,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41915.7, 300 sec: 41709.8). Total num frames: 2040823808. Throughput: 0: 10485.2. Samples: 260203566. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:23,976][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:25,451][626795] Updated weights for policy 0, policy_version 249132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:27,438][626795] Updated weights for policy 0, policy_version 249142 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:28,975][24592] Fps is (10 sec: 42598.9, 60 sec: 42052.3, 300 sec: 41737.6). Total num frames: 2041036800. Throughput: 0: 10478.5. Samples: 260234574. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:28,978][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:29,397][626795] Updated weights for policy 0, policy_version 249152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:31,334][626795] Updated weights for policy 0, policy_version 249162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:33,325][626795] Updated weights for policy 0, policy_version 249172 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:33,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41915.8, 300 sec: 41710.1). Total num frames: 2041241600. Throughput: 0: 10478.4. Samples: 260297940. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:33,977][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:35,262][626795] Updated weights for policy 0, policy_version 249182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:37,278][626795] Updated weights for policy 0, policy_version 249192 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:38,976][24592] Fps is (10 sec: 41777.5, 60 sec: 42053.5, 300 sec: 41737.7). Total num frames: 2041454592. Throughput: 0: 10472.8. Samples: 260360862. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:38,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:39,188][626795] Updated weights for policy 0, policy_version 249202 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:41,222][626795] Updated weights for policy 0, policy_version 249212 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:43,124][626795] Updated weights for policy 0, policy_version 249222 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:43,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41915.7, 300 sec: 41737.7). Total num frames: 2041659392. Throughput: 0: 10465.6. Samples: 260391570. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:43,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:45,124][626795] Updated weights for policy 0, policy_version 249232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:46,999][626795] Updated weights for policy 0, policy_version 249242 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:48,975][24592] Fps is (10 sec: 40961.7, 60 sec: 41779.2, 300 sec: 41737.5). Total num frames: 2041864192. Throughput: 0: 10454.4. Samples: 260454456. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:48,976][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:49,086][626795] Updated weights for policy 0, policy_version 249252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:51,122][626795] Updated weights for policy 0, policy_version 249262 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:53,064][626795] Updated weights for policy 0, policy_version 249272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:53,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41779.2, 300 sec: 41738.6). Total num frames: 2042068992. Throughput: 0: 10443.7. Samples: 260516454. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:53,979][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:54,952][626795] Updated weights for policy 0, policy_version 249282 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:56,960][626795] Updated weights for policy 0, policy_version 249292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:05:58,888][626795] Updated weights for policy 0, policy_version 249302 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:58,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41779.1, 300 sec: 41765.4). Total num frames: 2042281984. Throughput: 0: 10444.8. Samples: 260547546. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:05:58,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:00,886][626795] Updated weights for policy 0, policy_version 249312 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:02,807][626795] Updated weights for policy 0, policy_version 249322 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:03,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41642.7, 300 sec: 41765.3). Total num frames: 2042486784. Throughput: 0: 10458.6. Samples: 260610888. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:03,976][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:04,816][626795] Updated weights for policy 0, policy_version 249332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:06,720][626795] Updated weights for policy 0, policy_version 249342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:08,541][626795] Updated weights for policy 0, policy_version 249352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:08,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.5, 300 sec: 41793.1). Total num frames: 2042699776. Throughput: 0: 10453.3. Samples: 260673966. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:08,976][24592] Avg episode reward: [(0, '4.927')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:10,660][626795] Updated weights for policy 0, policy_version 249362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:12,582][626795] Updated weights for policy 0, policy_version 249372 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:13,976][24592] Fps is (10 sec: 42596.2, 60 sec: 41779.0, 300 sec: 41793.0). Total num frames: 2042912768. Throughput: 0: 10440.4. Samples: 260704398. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:13,977][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:14,573][626795] Updated weights for policy 0, policy_version 249382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:16,605][626795] Updated weights for policy 0, policy_version 249392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:18,520][626795] Updated weights for policy 0, policy_version 249402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:18,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41779.2, 300 sec: 41793.1). Total num frames: 2043117568. Throughput: 0: 10432.1. Samples: 260767386. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:18,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:20,440][626795] Updated weights for policy 0, policy_version 249412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:22,434][626795] Updated weights for policy 0, policy_version 249422 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:23,976][24592] Fps is (10 sec: 41780.1, 60 sec: 41779.0, 300 sec: 41793.0). Total num frames: 2043330560. Throughput: 0: 10419.9. Samples: 260829756. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:23,977][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:24,438][626795] Updated weights for policy 0, policy_version 249432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:26,244][626795] Updated weights for policy 0, policy_version 249442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:28,234][626795] Updated weights for policy 0, policy_version 249452 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:28,976][24592] Fps is (10 sec: 41776.1, 60 sec: 41642.0, 300 sec: 41793.0). Total num frames: 2043535360. Throughput: 0: 10435.4. Samples: 260861172. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:28,978][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:30,336][626795] Updated weights for policy 0, policy_version 249462 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:32,190][626795] Updated weights for policy 0, policy_version 249472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:33,975][24592] Fps is (10 sec: 41780.6, 60 sec: 41779.2, 300 sec: 41820.9). Total num frames: 2043748352. Throughput: 0: 10434.5. Samples: 260924010. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:33,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:34,047][626795] Updated weights for policy 0, policy_version 249482 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:36,128][626795] Updated weights for policy 0, policy_version 249492 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:38,018][626795] Updated weights for policy 0, policy_version 249502 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:38,975][24592] Fps is (10 sec: 41783.3, 60 sec: 41643.0, 300 sec: 41821.0). Total num frames: 2043953152. Throughput: 0: 10470.0. Samples: 260987604. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:38,978][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:39,967][626795] Updated weights for policy 0, policy_version 249512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:41,885][626795] Updated weights for policy 0, policy_version 249522 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:43,863][626795] Updated weights for policy 0, policy_version 249532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:43,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41779.2, 300 sec: 41820.8). Total num frames: 2044166144. Throughput: 0: 10480.1. Samples: 261019152. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:43,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:45,772][626795] Updated weights for policy 0, policy_version 249542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:47,810][626795] Updated weights for policy 0, policy_version 249552 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:48,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41915.6, 300 sec: 41820.9). Total num frames: 2044379136. Throughput: 0: 10461.8. Samples: 261081672. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:48,976][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:49,817][626795] Updated weights for policy 0, policy_version 249562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:51,760][626795] Updated weights for policy 0, policy_version 249572 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:53,640][626795] Updated weights for policy 0, policy_version 249582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:53,977][24592] Fps is (10 sec: 41773.8, 60 sec: 41914.8, 300 sec: 41820.7). Total num frames: 2044583936. Throughput: 0: 10445.2. Samples: 261144012. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:53,978][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:55,778][626795] Updated weights for policy 0, policy_version 249592 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:57,722][626795] Updated weights for policy 0, policy_version 249602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:58,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41779.2, 300 sec: 41793.2). Total num frames: 2044788736. Throughput: 0: 10450.3. Samples: 261174654. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:06:58,978][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:06:59,530][626795] Updated weights for policy 0, policy_version 249612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:01,589][626795] Updated weights for policy 0, policy_version 249622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:03,542][626795] Updated weights for policy 0, policy_version 249632 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:03,976][24592] Fps is (10 sec: 41782.4, 60 sec: 41915.4, 300 sec: 41793.0). Total num frames: 2045001728. Throughput: 0: 10464.3. Samples: 261238284. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:03,978][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000249634_2045001728.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:04,087][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000248408_2034958336.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:05,474][626795] Updated weights for policy 0, policy_version 249642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:07,528][626795] Updated weights for policy 0, policy_version 249652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:08,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41915.7, 300 sec: 41820.9). Total num frames: 2045214720. Throughput: 0: 10467.7. Samples: 261300798. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:08,978][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:09,349][626795] Updated weights for policy 0, policy_version 249662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:11,334][626795] Updated weights for policy 0, policy_version 249672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:13,314][626795] Updated weights for policy 0, policy_version 249682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:13,976][24592] Fps is (10 sec: 41780.2, 60 sec: 41779.3, 300 sec: 41793.0). Total num frames: 2045419520. Throughput: 0: 10462.3. Samples: 261331968. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:13,980][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:15,194][626795] Updated weights for policy 0, policy_version 249692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:17,169][626795] Updated weights for policy 0, policy_version 249702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41915.8, 300 sec: 41820.9). Total num frames: 2045632512. Throughput: 0: 10467.3. Samples: 261395040. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:18,978][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:19,287][626795] Updated weights for policy 0, policy_version 249712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:21,142][626795] Updated weights for policy 0, policy_version 249722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:23,103][626795] Updated weights for policy 0, policy_version 249732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:23,975][24592] Fps is (10 sec: 41780.5, 60 sec: 41779.4, 300 sec: 41793.1). Total num frames: 2045837312. Throughput: 0: 10443.4. Samples: 261457560. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:23,976][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:25,120][626795] Updated weights for policy 0, policy_version 249742 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:27,045][626795] Updated weights for policy 0, policy_version 249752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:28,954][626795] Updated weights for policy 0, policy_version 249762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:28,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41916.4, 300 sec: 41820.9). Total num frames: 2046050304. Throughput: 0: 10431.1. Samples: 261488550. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:28,977][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:30,948][626795] Updated weights for policy 0, policy_version 249772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:32,873][626795] Updated weights for policy 0, policy_version 249782 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:33,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41915.7, 300 sec: 41848.6). Total num frames: 2046263296. Throughput: 0: 10453.2. Samples: 261552066. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:33,977][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:34,835][626795] Updated weights for policy 0, policy_version 249792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:36,820][626795] Updated weights for policy 0, policy_version 249802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:38,742][626795] Updated weights for policy 0, policy_version 249812 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:38,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41915.5, 300 sec: 41848.6). Total num frames: 2046468096. Throughput: 0: 10474.0. Samples: 261615330. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:38,977][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:40,709][626795] Updated weights for policy 0, policy_version 249822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:42,674][626795] Updated weights for policy 0, policy_version 249832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:43,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41915.5, 300 sec: 41848.6). Total num frames: 2046681088. Throughput: 0: 10473.7. Samples: 261645972. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:43,977][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:44,582][626795] Updated weights for policy 0, policy_version 249842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:46,601][626795] Updated weights for policy 0, policy_version 249852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:48,450][626795] Updated weights for policy 0, policy_version 249862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:48,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41779.3, 300 sec: 41848.7). Total num frames: 2046885888. Throughput: 0: 10467.1. Samples: 261709296. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:48,978][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:50,449][626795] Updated weights for policy 0, policy_version 249872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:52,478][626795] Updated weights for policy 0, policy_version 249882 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:53,976][24592] Fps is (10 sec: 41779.1, 60 sec: 41916.4, 300 sec: 41848.6). Total num frames: 2047098880. Throughput: 0: 10462.3. Samples: 261771606. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:53,978][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:54,406][626795] Updated weights for policy 0, policy_version 249892 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:56,377][626795] Updated weights for policy 0, policy_version 249902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:07:58,335][626795] Updated weights for policy 0, policy_version 249912 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:58,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41779.2, 300 sec: 41820.9). Total num frames: 2047295488. Throughput: 0: 10466.7. Samples: 261802968. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:07:58,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:00,354][626795] Updated weights for policy 0, policy_version 249922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:02,168][626795] Updated weights for policy 0, policy_version 249932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:03,976][24592] Fps is (10 sec: 41779.0, 60 sec: 41915.8, 300 sec: 41876.3). Total num frames: 2047516672. Throughput: 0: 10461.9. Samples: 261865830. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:03,977][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:04,197][626795] Updated weights for policy 0, policy_version 249942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:06,140][626795] Updated weights for policy 0, policy_version 249952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:08,012][626795] Updated weights for policy 0, policy_version 249962 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:08,976][24592] Fps is (10 sec: 42598.1, 60 sec: 41779.1, 300 sec: 41848.6). Total num frames: 2047721472. Throughput: 0: 10481.3. Samples: 261929220. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:08,978][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:10,102][626795] Updated weights for policy 0, policy_version 249972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:11,919][626795] Updated weights for policy 0, policy_version 249982 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:13,900][626795] Updated weights for policy 0, policy_version 249992 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:13,975][24592] Fps is (10 sec: 41781.0, 60 sec: 41916.0, 300 sec: 41876.4). Total num frames: 2047934464. Throughput: 0: 10490.7. Samples: 261960630. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:13,977][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:15,867][626795] Updated weights for policy 0, policy_version 250002 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:17,820][626795] Updated weights for policy 0, policy_version 250012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:18,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41915.8, 300 sec: 41876.4). Total num frames: 2048147456. Throughput: 0: 10470.9. Samples: 262023258. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:18,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:19,795][626795] Updated weights for policy 0, policy_version 250022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:21,686][626795] Updated weights for policy 0, policy_version 250032 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:23,780][626795] Updated weights for policy 0, policy_version 250042 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:23,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41915.7, 300 sec: 41876.4). Total num frames: 2048352256. Throughput: 0: 10462.4. Samples: 262086138. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:23,977][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:25,735][626795] Updated weights for policy 0, policy_version 250052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:27,738][626795] Updated weights for policy 0, policy_version 250062 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:28,975][24592] Fps is (10 sec: 40959.5, 60 sec: 41779.1, 300 sec: 41848.6). Total num frames: 2048557056. Throughput: 0: 10470.7. Samples: 262117152. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:28,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:29,506][626795] Updated weights for policy 0, policy_version 250072 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:31,511][626795] Updated weights for policy 0, policy_version 250082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:33,565][626795] Updated weights for policy 0, policy_version 250092 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:33,981][24592] Fps is (10 sec: 41755.3, 60 sec: 41775.2, 300 sec: 41875.6). Total num frames: 2048770048. Throughput: 0: 10454.0. Samples: 262179786. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:33,982][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:35,538][626795] Updated weights for policy 0, policy_version 250102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:37,410][626795] Updated weights for policy 0, policy_version 250112 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:38,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.4, 300 sec: 41848.6). Total num frames: 2048974848. Throughput: 0: 10465.2. Samples: 262242534. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:38,977][24592] Avg episode reward: [(0, '4.816')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:39,370][626772] Signal inference workers to stop experience collection... (3500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:39,370][626772] Signal inference workers to resume experience collection... (3500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:39,380][626795] InferenceWorker_p0-w0: stopping experience collection (3500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:39,387][626795] InferenceWorker_p0-w0: resuming experience collection (3500 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:39,420][626795] Updated weights for policy 0, policy_version 250122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:41,424][626795] Updated weights for policy 0, policy_version 250132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:43,315][626795] Updated weights for policy 0, policy_version 250142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:43,975][24592] Fps is (10 sec: 41803.5, 60 sec: 41779.5, 300 sec: 41848.6). Total num frames: 2049187840. Throughput: 0: 10464.7. Samples: 262273878. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:43,977][24592] Avg episode reward: [(0, '4.982')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:45,231][626795] Updated weights for policy 0, policy_version 250152 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:47,236][626795] Updated weights for policy 0, policy_version 250162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:48,976][24592] Fps is (10 sec: 41777.3, 60 sec: 41778.9, 300 sec: 41848.9). Total num frames: 2049392640. Throughput: 0: 10457.2. Samples: 262336404. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:48,977][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:49,221][626795] Updated weights for policy 0, policy_version 250172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:51,167][626795] Updated weights for policy 0, policy_version 250182 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:53,010][626795] Updated weights for policy 0, policy_version 250192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:53,977][24592] Fps is (10 sec: 41773.6, 60 sec: 41778.6, 300 sec: 41848.4). Total num frames: 2049605632. Throughput: 0: 10448.5. Samples: 262399416. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:53,979][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:55,085][626795] Updated weights for policy 0, policy_version 250202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:57,053][626795] Updated weights for policy 0, policy_version 250212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:58,976][24592] Fps is (10 sec: 41780.6, 60 sec: 41915.7, 300 sec: 41848.6). Total num frames: 2049810432. Throughput: 0: 10442.8. Samples: 262430556. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:08:58,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:08:59,059][626795] Updated weights for policy 0, policy_version 250222 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:00,976][626795] Updated weights for policy 0, policy_version 250232 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:02,947][626795] Updated weights for policy 0, policy_version 250242 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:03,976][24592] Fps is (10 sec: 41782.8, 60 sec: 41779.2, 300 sec: 41848.6). Total num frames: 2050023424. Throughput: 0: 10450.2. Samples: 262493520. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:03,981][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000250247_2050023424.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:04,083][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000249022_2039988224.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:04,968][626795] Updated weights for policy 0, policy_version 250252 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:06,841][626795] Updated weights for policy 0, policy_version 250262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:08,764][626795] Updated weights for policy 0, policy_version 250272 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:08,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41779.0, 300 sec: 41848.6). Total num frames: 2050228224. Throughput: 0: 10453.3. Samples: 262556538. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:08,977][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:10,868][626795] Updated weights for policy 0, policy_version 250282 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:12,860][626795] Updated weights for policy 0, policy_version 250292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:13,975][24592] Fps is (10 sec: 41781.1, 60 sec: 41779.2, 300 sec: 41848.6). Total num frames: 2050441216. Throughput: 0: 10429.8. Samples: 262586490. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:13,977][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:14,793][626795] Updated weights for policy 0, policy_version 250302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:16,810][626795] Updated weights for policy 0, policy_version 250312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:18,563][626795] Updated weights for policy 0, policy_version 250322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:18,976][24592] Fps is (10 sec: 41779.2, 60 sec: 41642.4, 300 sec: 41820.8). Total num frames: 2050646016. Throughput: 0: 10426.7. Samples: 262648932. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:18,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:20,740][626795] Updated weights for policy 0, policy_version 250332 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:22,602][626795] Updated weights for policy 0, policy_version 250342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:23,977][24592] Fps is (10 sec: 40954.5, 60 sec: 41641.8, 300 sec: 41820.7). Total num frames: 2050850816. Throughput: 0: 10440.4. Samples: 262712364. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:23,978][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:24,503][626795] Updated weights for policy 0, policy_version 250352 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:26,672][626795] Updated weights for policy 0, policy_version 250362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:28,654][626795] Updated weights for policy 0, policy_version 250372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:28,976][24592] Fps is (10 sec: 41780.2, 60 sec: 41779.2, 300 sec: 41820.9). Total num frames: 2051063808. Throughput: 0: 10408.5. Samples: 262742262. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:28,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:30,554][626795] Updated weights for policy 0, policy_version 250382 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:32,517][626795] Updated weights for policy 0, policy_version 250392 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:33,975][24592] Fps is (10 sec: 41784.5, 60 sec: 41646.6, 300 sec: 41821.2). Total num frames: 2051268608. Throughput: 0: 10414.0. Samples: 262805028. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:33,976][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:34,522][626795] Updated weights for policy 0, policy_version 250402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:36,418][626795] Updated weights for policy 0, policy_version 250412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:38,373][626795] Updated weights for policy 0, policy_version 250422 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:38,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41642.7, 300 sec: 41793.1). Total num frames: 2051473408. Throughput: 0: 10411.8. Samples: 262867932. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:38,979][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:40,316][626795] Updated weights for policy 0, policy_version 250432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:42,286][626795] Updated weights for policy 0, policy_version 250442 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:43,977][24592] Fps is (10 sec: 42590.2, 60 sec: 41777.8, 300 sec: 41820.6). Total num frames: 2051694592. Throughput: 0: 10420.8. Samples: 262899510. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:43,979][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:44,120][626795] Updated weights for policy 0, policy_version 250452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:46,199][626795] Updated weights for policy 0, policy_version 250462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:48,051][626795] Updated weights for policy 0, policy_version 250472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:48,976][24592] Fps is (10 sec: 42596.8, 60 sec: 41779.3, 300 sec: 41820.8). Total num frames: 2051899392. Throughput: 0: 10430.6. Samples: 262962894. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:48,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:50,076][626795] Updated weights for policy 0, policy_version 250482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:51,969][626795] Updated weights for policy 0, policy_version 250492 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:53,925][626795] Updated weights for policy 0, policy_version 250502 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:53,975][24592] Fps is (10 sec: 41787.6, 60 sec: 41780.1, 300 sec: 41820.9). Total num frames: 2052112384. Throughput: 0: 10431.5. Samples: 263025954. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:53,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:55,942][626795] Updated weights for policy 0, policy_version 250512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:57,731][626795] Updated weights for policy 0, policy_version 250522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:58,975][24592] Fps is (10 sec: 41780.4, 60 sec: 41779.2, 300 sec: 41793.1). Total num frames: 2052317184. Throughput: 0: 10469.7. Samples: 263057628. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:09:58,979][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:09:59,807][626795] Updated weights for policy 0, policy_version 250532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:01,812][626795] Updated weights for policy 0, policy_version 250542 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:03,576][626795] Updated weights for policy 0, policy_version 250552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:03,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.5, 300 sec: 41820.9). Total num frames: 2052530176. Throughput: 0: 10480.3. Samples: 263120544. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:03,977][24592] Avg episode reward: [(0, '4.960')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:05,539][626795] Updated weights for policy 0, policy_version 250562 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:07,566][626795] Updated weights for policy 0, policy_version 250572 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:08,976][24592] Fps is (10 sec: 42598.2, 60 sec: 41915.9, 300 sec: 41820.9). Total num frames: 2052743168. Throughput: 0: 10469.2. Samples: 263183466. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:08,977][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:09,580][626795] Updated weights for policy 0, policy_version 250582 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:11,472][626795] Updated weights for policy 0, policy_version 250592 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:13,431][626795] Updated weights for policy 0, policy_version 250602 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:13,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41779.2, 300 sec: 41820.9). Total num frames: 2052947968. Throughput: 0: 10498.3. Samples: 263214684. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:13,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:15,527][626795] Updated weights for policy 0, policy_version 250612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:17,419][626795] Updated weights for policy 0, policy_version 250622 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:18,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41915.9, 300 sec: 41820.8). Total num frames: 2053160960. Throughput: 0: 10488.4. Samples: 263277006. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:18,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:19,436][626795] Updated weights for policy 0, policy_version 250632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:21,373][626795] Updated weights for policy 0, policy_version 250642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:23,361][626795] Updated weights for policy 0, policy_version 250652 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41916.7, 300 sec: 41793.1). Total num frames: 2053365760. Throughput: 0: 10481.6. Samples: 263339604. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:23,976][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:25,258][626795] Updated weights for policy 0, policy_version 250662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:27,258][626795] Updated weights for policy 0, policy_version 250672 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:28,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41779.2, 300 sec: 41793.1). Total num frames: 2053570560. Throughput: 0: 10476.6. Samples: 263370936. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:28,977][24592] Avg episode reward: [(0, '4.890')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:29,077][626795] Updated weights for policy 0, policy_version 250682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:31,206][626795] Updated weights for policy 0, policy_version 250692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:33,189][626795] Updated weights for policy 0, policy_version 250702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:33,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41779.2, 300 sec: 41765.4). Total num frames: 2053775360. Throughput: 0: 10452.5. Samples: 263433252. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:33,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:35,105][626795] Updated weights for policy 0, policy_version 250712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:37,102][626795] Updated weights for policy 0, policy_version 250722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:38,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41915.7, 300 sec: 41793.1). Total num frames: 2053988352. Throughput: 0: 10431.8. Samples: 263495388. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:38,978][24592] Avg episode reward: [(0, '4.928')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:39,106][626795] Updated weights for policy 0, policy_version 250732 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:41,171][626795] Updated weights for policy 0, policy_version 250742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:42,981][626795] Updated weights for policy 0, policy_version 250752 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:43,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41644.0, 300 sec: 41793.1). Total num frames: 2054193152. Throughput: 0: 10409.1. Samples: 263526036. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:43,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:45,051][626795] Updated weights for policy 0, policy_version 250762 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:46,860][626795] Updated weights for policy 0, policy_version 250772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:48,927][626795] Updated weights for policy 0, policy_version 250782 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:48,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41779.2, 300 sec: 41820.8). Total num frames: 2054406144. Throughput: 0: 10413.9. Samples: 263589174. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:48,977][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:50,992][626795] Updated weights for policy 0, policy_version 250792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:52,815][626795] Updated weights for policy 0, policy_version 250802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:53,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41642.7, 300 sec: 41793.1). Total num frames: 2054610944. Throughput: 0: 10395.2. Samples: 263651250. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:53,976][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:54,818][626795] Updated weights for policy 0, policy_version 250812 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:56,914][626795] Updated weights for policy 0, policy_version 250822 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:10:58,802][626795] Updated weights for policy 0, policy_version 250832 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:58,975][24592] Fps is (10 sec: 40961.1, 60 sec: 41642.7, 300 sec: 41793.1). Total num frames: 2054815744. Throughput: 0: 10372.1. Samples: 263681430. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:10:58,976][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:00,708][626795] Updated weights for policy 0, policy_version 250842 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:02,747][626795] Updated weights for policy 0, policy_version 250852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:03,975][24592] Fps is (10 sec: 40959.4, 60 sec: 41506.1, 300 sec: 41765.3). Total num frames: 2055020544. Throughput: 0: 10389.9. Samples: 263744550. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:03,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000250857_2055020544.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:04,055][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000249634_2045001728.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:04,967][626795] Updated weights for policy 0, policy_version 250862 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:07,056][626795] Updated weights for policy 0, policy_version 250872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:08,975][24592] Fps is (10 sec: 40140.9, 60 sec: 41233.2, 300 sec: 41709.9). Total num frames: 2055217152. Throughput: 0: 10284.3. Samples: 263802396. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:08,976][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:09,123][626795] Updated weights for policy 0, policy_version 250882 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:11,162][626795] Updated weights for policy 0, policy_version 250892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:13,030][626795] Updated weights for policy 0, policy_version 250902 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:13,975][24592] Fps is (10 sec: 40141.1, 60 sec: 41233.0, 300 sec: 41709.8). Total num frames: 2055421952. Throughput: 0: 10272.8. Samples: 263833212. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:13,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:14,988][626795] Updated weights for policy 0, policy_version 250912 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:17,030][626795] Updated weights for policy 0, policy_version 250922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:18,945][626795] Updated weights for policy 0, policy_version 250932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:18,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.1, 300 sec: 41709.8). Total num frames: 2055634944. Throughput: 0: 10271.9. Samples: 263895486. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:18,977][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:20,960][626795] Updated weights for policy 0, policy_version 250942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:22,949][626795] Updated weights for policy 0, policy_version 250952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:23,976][24592] Fps is (10 sec: 41777.1, 60 sec: 41232.7, 300 sec: 41709.8). Total num frames: 2055839744. Throughput: 0: 10295.6. Samples: 263958696. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:23,977][24592] Avg episode reward: [(0, '4.951')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:24,818][626795] Updated weights for policy 0, policy_version 250962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:26,797][626795] Updated weights for policy 0, policy_version 250972 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:28,613][626795] Updated weights for policy 0, policy_version 250982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:28,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41369.6, 300 sec: 41709.8). Total num frames: 2056052736. Throughput: 0: 10318.9. Samples: 263990388. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:28,976][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:30,752][626795] Updated weights for policy 0, policy_version 250992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:32,710][626795] Updated weights for policy 0, policy_version 251002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:33,975][24592] Fps is (10 sec: 41780.9, 60 sec: 41369.6, 300 sec: 41709.8). Total num frames: 2056257536. Throughput: 0: 10301.4. Samples: 264052734. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:33,976][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:34,664][626795] Updated weights for policy 0, policy_version 251012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:36,623][626795] Updated weights for policy 0, policy_version 251022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:38,678][626795] Updated weights for policy 0, policy_version 251032 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:38,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.6, 300 sec: 41709.8). Total num frames: 2056470528. Throughput: 0: 10306.2. Samples: 264115032. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:38,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:40,561][626795] Updated weights for policy 0, policy_version 251042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:42,584][626795] Updated weights for policy 0, policy_version 251052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:43,975][24592] Fps is (10 sec: 41780.0, 60 sec: 41369.7, 300 sec: 41682.0). Total num frames: 2056675328. Throughput: 0: 10312.6. Samples: 264145494. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:43,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:44,609][626795] Updated weights for policy 0, policy_version 251062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:46,592][626795] Updated weights for policy 0, policy_version 251072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:48,478][626795] Updated weights for policy 0, policy_version 251082 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:48,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41233.3, 300 sec: 41682.2). Total num frames: 2056880128. Throughput: 0: 10288.2. Samples: 264207516. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:48,976][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:50,449][626795] Updated weights for policy 0, policy_version 251092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:52,402][626795] Updated weights for policy 0, policy_version 251102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:53,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.6, 300 sec: 41709.8). Total num frames: 2057093120. Throughput: 0: 10402.4. Samples: 264270504. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:53,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:54,376][626795] Updated weights for policy 0, policy_version 251112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:56,427][626795] Updated weights for policy 0, policy_version 251122 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:11:58,306][626795] Updated weights for policy 0, policy_version 251132 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.6, 300 sec: 41682.1). Total num frames: 2057297920. Throughput: 0: 10408.4. Samples: 264301590. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:11:58,976][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:00,206][626795] Updated weights for policy 0, policy_version 251142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:02,210][626795] Updated weights for policy 0, policy_version 251152 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.2, 300 sec: 41682.0). Total num frames: 2057510912. Throughput: 0: 10434.0. Samples: 264365016. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:03,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:04,132][626795] Updated weights for policy 0, policy_version 251162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:06,087][626795] Updated weights for policy 0, policy_version 251172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:07,946][626795] Updated weights for policy 0, policy_version 251182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:08,976][24592] Fps is (10 sec: 42596.3, 60 sec: 41778.9, 300 sec: 41709.8). Total num frames: 2057723904. Throughput: 0: 10431.2. Samples: 264428100. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:08,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:10,070][626795] Updated weights for policy 0, policy_version 251192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:11,974][626795] Updated weights for policy 0, policy_version 251202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:13,965][626795] Updated weights for policy 0, policy_version 251212 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:13,977][24592] Fps is (10 sec: 41773.2, 60 sec: 41778.3, 300 sec: 41681.8). Total num frames: 2057928704. Throughput: 0: 10411.0. Samples: 264458898. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:13,978][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:15,905][626795] Updated weights for policy 0, policy_version 251222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:17,887][626795] Updated weights for policy 0, policy_version 251232 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:18,975][24592] Fps is (10 sec: 40961.9, 60 sec: 41642.7, 300 sec: 41682.0). Total num frames: 2058133504. Throughput: 0: 10426.6. Samples: 264521928. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:18,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:19,868][626795] Updated weights for policy 0, policy_version 251242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:21,738][626795] Updated weights for policy 0, policy_version 251252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:23,661][626795] Updated weights for policy 0, policy_version 251262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:23,976][24592] Fps is (10 sec: 41783.5, 60 sec: 41779.3, 300 sec: 41682.0). Total num frames: 2058346496. Throughput: 0: 10447.3. Samples: 264585162. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:23,977][24592] Avg episode reward: [(0, '4.964')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:25,661][626795] Updated weights for policy 0, policy_version 251272 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:27,602][626795] Updated weights for policy 0, policy_version 251282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:28,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.3, 300 sec: 41682.0). Total num frames: 2058559488. Throughput: 0: 10464.0. Samples: 264616374. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:28,977][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:29,607][626795] Updated weights for policy 0, policy_version 251292 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:31,501][626795] Updated weights for policy 0, policy_version 251302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:33,405][626795] Updated weights for policy 0, policy_version 251312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:33,975][24592] Fps is (10 sec: 41780.5, 60 sec: 41779.2, 300 sec: 41682.0). Total num frames: 2058764288. Throughput: 0: 10490.1. Samples: 264679572. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:33,977][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:35,399][626795] Updated weights for policy 0, policy_version 251322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:37,357][626795] Updated weights for policy 0, policy_version 251332 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:38,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.2, 300 sec: 41682.1). Total num frames: 2058977280. Throughput: 0: 10489.0. Samples: 264742512. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:38,977][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:39,460][626795] Updated weights for policy 0, policy_version 251342 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:41,270][626795] Updated weights for policy 0, policy_version 251352 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:43,348][626795] Updated weights for policy 0, policy_version 251362 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:43,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41779.2, 300 sec: 41682.0). Total num frames: 2059182080. Throughput: 0: 10470.5. Samples: 264772764. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:43,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:45,229][626795] Updated weights for policy 0, policy_version 251372 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:47,255][626795] Updated weights for policy 0, policy_version 251382 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:48,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41779.2, 300 sec: 41654.3). Total num frames: 2059386880. Throughput: 0: 10446.5. Samples: 264835110. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:48,978][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:49,301][626795] Updated weights for policy 0, policy_version 251392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:51,267][626795] Updated weights for policy 0, policy_version 251402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:53,221][626795] Updated weights for policy 0, policy_version 251412 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:53,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.2, 300 sec: 41709.8). Total num frames: 2059599872. Throughput: 0: 10419.3. Samples: 264896964. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:53,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:55,183][626795] Updated weights for policy 0, policy_version 251422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:57,111][626795] Updated weights for policy 0, policy_version 251432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:12:58,956][626795] Updated weights for policy 0, policy_version 251442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:58,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41915.7, 300 sec: 41682.1). Total num frames: 2059812864. Throughput: 0: 10444.1. Samples: 264928866. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:12:58,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:00,932][626795] Updated weights for policy 0, policy_version 251452 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:02,935][626795] Updated weights for policy 0, policy_version 251462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:03,976][24592] Fps is (10 sec: 41776.3, 60 sec: 41778.7, 300 sec: 41681.9). Total num frames: 2060017664. Throughput: 0: 10440.4. Samples: 264991752. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:03,977][24592] Avg episode reward: [(0, '4.967')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000251467_2060017664.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:04,062][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000250247_2050023424.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:04,990][626795] Updated weights for policy 0, policy_version 251472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:06,870][626795] Updated weights for policy 0, policy_version 251482 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:08,881][626795] Updated weights for policy 0, policy_version 251492 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:08,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41643.0, 300 sec: 41654.2). Total num frames: 2060222464. Throughput: 0: 10426.5. Samples: 265054350. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:08,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:10,781][626795] Updated weights for policy 0, policy_version 251502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:12,828][626795] Updated weights for policy 0, policy_version 251512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:13,975][24592] Fps is (10 sec: 41782.0, 60 sec: 41780.1, 300 sec: 41654.2). Total num frames: 2060435456. Throughput: 0: 10424.4. Samples: 265085472. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:13,979][24592] Avg episode reward: [(0, '4.926')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:14,751][626795] Updated weights for policy 0, policy_version 251522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:16,724][626795] Updated weights for policy 0, policy_version 251532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:18,679][626795] Updated weights for policy 0, policy_version 251542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:18,976][24592] Fps is (10 sec: 41777.1, 60 sec: 41778.9, 300 sec: 41654.2). Total num frames: 2060640256. Throughput: 0: 10415.9. Samples: 265148292. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:18,979][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:20,669][626795] Updated weights for policy 0, policy_version 251552 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:22,565][626795] Updated weights for policy 0, policy_version 251562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:23,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.4, 300 sec: 41682.0). Total num frames: 2060853248. Throughput: 0: 10407.6. Samples: 265210854. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:23,976][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:24,606][626795] Updated weights for policy 0, policy_version 251572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:26,525][626795] Updated weights for policy 0, policy_version 251582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:28,439][626795] Updated weights for policy 0, policy_version 251592 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:28,976][24592] Fps is (10 sec: 41779.4, 60 sec: 41642.4, 300 sec: 41655.0). Total num frames: 2061058048. Throughput: 0: 10430.6. Samples: 265242144. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:28,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:30,329][626795] Updated weights for policy 0, policy_version 251602 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:32,291][626795] Updated weights for policy 0, policy_version 251612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:33,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41779.2, 300 sec: 41682.0). Total num frames: 2061271040. Throughput: 0: 10446.5. Samples: 265305204. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:33,977][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:34,284][626795] Updated weights for policy 0, policy_version 251622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:36,285][626795] Updated weights for policy 0, policy_version 251632 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:38,245][626795] Updated weights for policy 0, policy_version 251642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:38,975][24592] Fps is (10 sec: 41780.6, 60 sec: 41642.7, 300 sec: 41654.2). Total num frames: 2061475840. Throughput: 0: 10466.9. Samples: 265367976. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:38,977][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:40,259][626795] Updated weights for policy 0, policy_version 251652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:42,143][626795] Updated weights for policy 0, policy_version 251662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:43,976][24592] Fps is (10 sec: 40959.8, 60 sec: 41642.5, 300 sec: 41654.3). Total num frames: 2061680640. Throughput: 0: 10448.5. Samples: 265399050. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:43,977][24592] Avg episode reward: [(0, '4.429')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:44,187][626795] Updated weights for policy 0, policy_version 251672 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:46,217][626795] Updated weights for policy 0, policy_version 251682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:48,094][626795] Updated weights for policy 0, policy_version 251692 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:48,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.2, 300 sec: 41654.4). Total num frames: 2061893632. Throughput: 0: 10438.2. Samples: 265461462. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:48,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:50,182][626795] Updated weights for policy 0, policy_version 251702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:52,012][626795] Updated weights for policy 0, policy_version 251712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:53,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41642.7, 300 sec: 41654.3). Total num frames: 2062098432. Throughput: 0: 10424.7. Samples: 265523460. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:53,978][24592] Avg episode reward: [(0, '4.894')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:54,019][626795] Updated weights for policy 0, policy_version 251722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:56,015][626795] Updated weights for policy 0, policy_version 251732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:58,047][626795] Updated weights for policy 0, policy_version 251742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:58,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41642.7, 300 sec: 41654.3). Total num frames: 2062311424. Throughput: 0: 10418.1. Samples: 265554288. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:13:58,977][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:13:59,890][626795] Updated weights for policy 0, policy_version 251752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:01,772][626795] Updated weights for policy 0, policy_version 251762 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:03,782][626795] Updated weights for policy 0, policy_version 251772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:03,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.7, 300 sec: 41682.0). Total num frames: 2062524416. Throughput: 0: 10437.0. Samples: 265617954. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:03,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:05,693][626795] Updated weights for policy 0, policy_version 251782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:07,604][626795] Updated weights for policy 0, policy_version 251792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:08,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41915.7, 300 sec: 41682.0). Total num frames: 2062737408. Throughput: 0: 10465.7. Samples: 265681812. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:08,977][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:09,609][626795] Updated weights for policy 0, policy_version 251802 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:11,553][626795] Updated weights for policy 0, policy_version 251812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:13,491][626795] Updated weights for policy 0, policy_version 251822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.2, 300 sec: 41682.1). Total num frames: 2062942208. Throughput: 0: 10462.4. Samples: 265712946. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:13,977][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:15,453][626795] Updated weights for policy 0, policy_version 251832 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:17,364][626795] Updated weights for policy 0, policy_version 251842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:18,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41916.0, 300 sec: 41710.0). Total num frames: 2063155200. Throughput: 0: 10456.3. Samples: 265775736. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:18,976][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:19,484][626795] Updated weights for policy 0, policy_version 251852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:21,359][626795] Updated weights for policy 0, policy_version 251862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:23,344][626795] Updated weights for policy 0, policy_version 251872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:23,975][24592] Fps is (10 sec: 41778.6, 60 sec: 41779.1, 300 sec: 41682.0). Total num frames: 2063360000. Throughput: 0: 10446.7. Samples: 265838076. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:23,977][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:25,292][626795] Updated weights for policy 0, policy_version 251882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:27,317][626795] Updated weights for policy 0, policy_version 251892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:28,991][24592] Fps is (10 sec: 40896.0, 60 sec: 41768.6, 300 sec: 41679.8). Total num frames: 2063564800. Throughput: 0: 10443.6. Samples: 265869174. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:28,992][24592] Avg episode reward: [(0, '4.292')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:29,242][626795] Updated weights for policy 0, policy_version 251902 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:31,221][626795] Updated weights for policy 0, policy_version 251912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:33,063][626795] Updated weights for policy 0, policy_version 251922 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:33,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.3, 300 sec: 41709.8). Total num frames: 2063777792. Throughput: 0: 10464.6. Samples: 265932372. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:33,977][24592] Avg episode reward: [(0, '4.435')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:35,058][626795] Updated weights for policy 0, policy_version 251932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:37,025][626795] Updated weights for policy 0, policy_version 251942 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:38,903][626795] Updated weights for policy 0, policy_version 251952 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:38,976][24592] Fps is (10 sec: 42663.7, 60 sec: 41915.5, 300 sec: 41682.2). Total num frames: 2063990784. Throughput: 0: 10488.6. Samples: 265995450. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:38,988][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:40,967][626795] Updated weights for policy 0, policy_version 251962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:42,977][626795] Updated weights for policy 0, policy_version 251972 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:43,976][24592] Fps is (10 sec: 42595.2, 60 sec: 42051.8, 300 sec: 41709.7). Total num frames: 2064203776. Throughput: 0: 10498.8. Samples: 266026740. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:43,977][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:44,746][626795] Updated weights for policy 0, policy_version 251982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:46,797][626795] Updated weights for policy 0, policy_version 251992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:48,623][626795] Updated weights for policy 0, policy_version 252002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:48,975][24592] Fps is (10 sec: 40961.5, 60 sec: 41779.1, 300 sec: 41654.2). Total num frames: 2064400384. Throughput: 0: 10471.6. Samples: 266089176. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:48,977][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:50,784][626795] Updated weights for policy 0, policy_version 252012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:52,684][626795] Updated weights for policy 0, policy_version 252022 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:53,975][24592] Fps is (10 sec: 40962.7, 60 sec: 41915.7, 300 sec: 41682.0). Total num frames: 2064613376. Throughput: 0: 10441.2. Samples: 266151666. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:53,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:54,745][626795] Updated weights for policy 0, policy_version 252032 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:56,692][626795] Updated weights for policy 0, policy_version 252042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:14:58,558][626795] Updated weights for policy 0, policy_version 252052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:58,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41915.7, 300 sec: 41682.0). Total num frames: 2064826368. Throughput: 0: 10439.9. Samples: 266182740. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:14:58,978][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:00,671][626795] Updated weights for policy 0, policy_version 252062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:02,549][626795] Updated weights for policy 0, policy_version 252072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:03,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41779.2, 300 sec: 41654.3). Total num frames: 2065031168. Throughput: 0: 10432.1. Samples: 266245182. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:03,976][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:03,978][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000252079_2065031168.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:04,101][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000250857_2055020544.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:04,544][626795] Updated weights for policy 0, policy_version 252082 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:06,536][626795] Updated weights for policy 0, policy_version 252092 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:08,396][626795] Updated weights for policy 0, policy_version 252102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:08,976][24592] Fps is (10 sec: 40958.5, 60 sec: 41642.4, 300 sec: 41654.2). Total num frames: 2065235968. Throughput: 0: 10445.9. Samples: 266308146. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:08,976][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:10,380][626795] Updated weights for policy 0, policy_version 252112 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:12,283][626795] Updated weights for policy 0, policy_version 252122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.2, 300 sec: 41654.2). Total num frames: 2065448960. Throughput: 0: 10444.8. Samples: 266339028. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:13,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:14,340][626795] Updated weights for policy 0, policy_version 252132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:16,272][626795] Updated weights for policy 0, policy_version 252142 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:18,132][626795] Updated weights for policy 0, policy_version 252152 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:18,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41642.7, 300 sec: 41654.2). Total num frames: 2065653760. Throughput: 0: 10448.5. Samples: 266402556. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:18,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:20,214][626795] Updated weights for policy 0, policy_version 252162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:22,111][626795] Updated weights for policy 0, policy_version 252172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:23,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.3, 300 sec: 41682.0). Total num frames: 2065866752. Throughput: 0: 10430.2. Samples: 266464806. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:23,976][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:24,243][626795] Updated weights for policy 0, policy_version 252182 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:26,071][626795] Updated weights for policy 0, policy_version 252192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:28,088][626795] Updated weights for policy 0, policy_version 252202 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:28,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41790.1, 300 sec: 41682.0). Total num frames: 2066071552. Throughput: 0: 10416.8. Samples: 266495490. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:28,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:30,040][626795] Updated weights for policy 0, policy_version 252212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:32,056][626795] Updated weights for policy 0, policy_version 252222 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:33,925][626795] Updated weights for policy 0, policy_version 252232 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:33,976][24592] Fps is (10 sec: 41777.0, 60 sec: 41778.8, 300 sec: 41681.9). Total num frames: 2066284544. Throughput: 0: 10416.8. Samples: 266557938. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:33,977][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:35,985][626795] Updated weights for policy 0, policy_version 252242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:37,935][626795] Updated weights for policy 0, policy_version 252252 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:38,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41642.9, 300 sec: 41682.0). Total num frames: 2066489344. Throughput: 0: 10426.8. Samples: 266620872. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:38,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:39,924][626795] Updated weights for policy 0, policy_version 252262 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:41,737][626795] Updated weights for policy 0, policy_version 252272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:43,739][626795] Updated weights for policy 0, policy_version 252282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:43,976][24592] Fps is (10 sec: 41779.3, 60 sec: 41642.8, 300 sec: 41682.0). Total num frames: 2066702336. Throughput: 0: 10438.9. Samples: 266652498. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:43,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:45,778][626795] Updated weights for policy 0, policy_version 252292 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:47,566][626795] Updated weights for policy 0, policy_version 252302 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:48,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41915.7, 300 sec: 41709.8). Total num frames: 2066915328. Throughput: 0: 10438.9. Samples: 266714934. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:48,977][24592] Avg episode reward: [(0, '4.267')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:49,683][626795] Updated weights for policy 0, policy_version 252312 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:51,667][626795] Updated weights for policy 0, policy_version 252322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:53,606][626795] Updated weights for policy 0, policy_version 252332 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:53,975][24592] Fps is (10 sec: 41781.5, 60 sec: 41779.3, 300 sec: 41709.8). Total num frames: 2067120128. Throughput: 0: 10431.6. Samples: 266777562. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:53,976][24592] Avg episode reward: [(0, '4.833')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:55,654][626795] Updated weights for policy 0, policy_version 252342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:57,581][626795] Updated weights for policy 0, policy_version 252352 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:58,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41642.7, 300 sec: 41709.8). Total num frames: 2067324928. Throughput: 0: 10416.4. Samples: 266807766. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:15:58,977][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:15:59,513][626795] Updated weights for policy 0, policy_version 252362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:01,590][626795] Updated weights for policy 0, policy_version 252372 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:03,124][626772] Signal inference workers to stop experience collection... (3550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:03,125][626772] Signal inference workers to resume experience collection... (3550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:03,132][626795] InferenceWorker_p0-w0: stopping experience collection (3550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:03,136][626795] InferenceWorker_p0-w0: resuming experience collection (3550 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:03,325][626795] Updated weights for policy 0, policy_version 252382 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:03,976][24592] Fps is (10 sec: 40959.2, 60 sec: 41642.5, 300 sec: 41737.5). Total num frames: 2067529728. Throughput: 0: 10403.8. Samples: 266870730. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:03,977][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:05,412][626795] Updated weights for policy 0, policy_version 252392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:07,296][626795] Updated weights for policy 0, policy_version 252402 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:08,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.5, 300 sec: 41765.3). Total num frames: 2067742720. Throughput: 0: 10414.6. Samples: 266933460. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:08,976][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:09,327][626795] Updated weights for policy 0, policy_version 252412 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:11,285][626795] Updated weights for policy 0, policy_version 252422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:13,250][626795] Updated weights for policy 0, policy_version 252432 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:13,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41642.6, 300 sec: 41737.5). Total num frames: 2067947520. Throughput: 0: 10423.7. Samples: 266964558. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:13,977][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:15,132][626795] Updated weights for policy 0, policy_version 252442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:17,133][626795] Updated weights for policy 0, policy_version 252452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:18,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41779.0, 300 sec: 41765.4). Total num frames: 2068160512. Throughput: 0: 10444.1. Samples: 267027918. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:18,977][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:19,123][626795] Updated weights for policy 0, policy_version 252462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:21,083][626795] Updated weights for policy 0, policy_version 252472 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:22,873][626795] Updated weights for policy 0, policy_version 252482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:23,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.2, 300 sec: 41765.3). Total num frames: 2068373504. Throughput: 0: 10448.5. Samples: 267091056. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:23,976][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:24,908][626795] Updated weights for policy 0, policy_version 252492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:26,903][626795] Updated weights for policy 0, policy_version 252502 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:28,779][626795] Updated weights for policy 0, policy_version 252512 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:28,975][24592] Fps is (10 sec: 41780.0, 60 sec: 41779.2, 300 sec: 41765.3). Total num frames: 2068578304. Throughput: 0: 10437.8. Samples: 267122196. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:28,976][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:30,878][626795] Updated weights for policy 0, policy_version 252522 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:32,739][626795] Updated weights for policy 0, policy_version 252532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:33,976][24592] Fps is (10 sec: 41777.4, 60 sec: 41779.2, 300 sec: 41765.3). Total num frames: 2068791296. Throughput: 0: 10437.2. Samples: 267184614. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:33,977][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:34,765][626795] Updated weights for policy 0, policy_version 252542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:36,721][626795] Updated weights for policy 0, policy_version 252552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:38,682][626795] Updated weights for policy 0, policy_version 252562 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:38,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.2, 300 sec: 41765.3). Total num frames: 2068996096. Throughput: 0: 10456.8. Samples: 267248118. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:38,977][24592] Avg episode reward: [(0, '4.856')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:40,599][626795] Updated weights for policy 0, policy_version 252572 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:42,592][626795] Updated weights for policy 0, policy_version 252582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:43,975][24592] Fps is (10 sec: 41781.4, 60 sec: 41779.6, 300 sec: 41793.1). Total num frames: 2069209088. Throughput: 0: 10473.9. Samples: 267279090. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:43,977][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:44,561][626795] Updated weights for policy 0, policy_version 252592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:46,393][626795] Updated weights for policy 0, policy_version 252602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:48,409][626795] Updated weights for policy 0, policy_version 252612 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:48,976][24592] Fps is (10 sec: 42597.9, 60 sec: 41779.1, 300 sec: 41793.1). Total num frames: 2069422080. Throughput: 0: 10479.6. Samples: 267342312. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:48,977][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:50,390][626795] Updated weights for policy 0, policy_version 252622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:52,403][626795] Updated weights for policy 0, policy_version 252632 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:53,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.2, 300 sec: 41793.1). Total num frames: 2069626880. Throughput: 0: 10472.7. Samples: 267404730. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:53,977][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:54,453][626795] Updated weights for policy 0, policy_version 252642 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:56,224][626795] Updated weights for policy 0, policy_version 252652 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:16:58,248][626795] Updated weights for policy 0, policy_version 252662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:58,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41779.1, 300 sec: 41765.3). Total num frames: 2069831680. Throughput: 0: 10450.0. Samples: 267434808. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:16:58,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:00,395][626795] Updated weights for policy 0, policy_version 252672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:02,221][626795] Updated weights for policy 0, policy_version 252682 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:03,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41779.3, 300 sec: 41737.6). Total num frames: 2070036480. Throughput: 0: 10437.4. Samples: 267497598. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:03,977][24592] Avg episode reward: [(0, '4.851')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000252690_2070036480.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:04,112][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000251467_2060017664.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:04,287][626795] Updated weights for policy 0, policy_version 252692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:06,289][626795] Updated weights for policy 0, policy_version 252702 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:08,116][626795] Updated weights for policy 0, policy_version 252712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:08,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.2, 300 sec: 41765.5). Total num frames: 2070249472. Throughput: 0: 10414.8. Samples: 267559722. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:08,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:10,100][626795] Updated weights for policy 0, policy_version 252722 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:12,088][626795] Updated weights for policy 0, policy_version 252732 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:13,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.3, 300 sec: 41765.3). Total num frames: 2070454272. Throughput: 0: 10414.5. Samples: 267590850. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:13,976][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:14,021][626795] Updated weights for policy 0, policy_version 252742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:16,068][626795] Updated weights for policy 0, policy_version 252752 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:18,040][626795] Updated weights for policy 0, policy_version 252762 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:18,977][24592] Fps is (10 sec: 41773.6, 60 sec: 41778.4, 300 sec: 41765.2). Total num frames: 2070667264. Throughput: 0: 10431.3. Samples: 267654030. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:18,978][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:19,982][626795] Updated weights for policy 0, policy_version 252772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:21,964][626795] Updated weights for policy 0, policy_version 252782 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:23,759][626795] Updated weights for policy 0, policy_version 252792 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:23,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41642.6, 300 sec: 41737.5). Total num frames: 2070872064. Throughput: 0: 10418.4. Samples: 267716946. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:23,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:25,724][626795] Updated weights for policy 0, policy_version 252802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:27,698][626795] Updated weights for policy 0, policy_version 252812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:28,975][24592] Fps is (10 sec: 41784.3, 60 sec: 41779.1, 300 sec: 41765.3). Total num frames: 2071085056. Throughput: 0: 10431.7. Samples: 267748518. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:28,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:29,608][626795] Updated weights for policy 0, policy_version 252822 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:31,620][626795] Updated weights for policy 0, policy_version 252832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:33,641][626795] Updated weights for policy 0, policy_version 252842 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:33,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.5, 300 sec: 41765.3). Total num frames: 2071298048. Throughput: 0: 10414.0. Samples: 267810942. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:33,977][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:35,520][626795] Updated weights for policy 0, policy_version 252852 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:37,605][626795] Updated weights for policy 0, policy_version 252862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:38,976][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.1, 300 sec: 41765.3). Total num frames: 2071502848. Throughput: 0: 10430.8. Samples: 267874116. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:38,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:39,487][626795] Updated weights for policy 0, policy_version 252872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:41,455][626795] Updated weights for policy 0, policy_version 252882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:43,342][626795] Updated weights for policy 0, policy_version 252892 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:43,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41642.4, 300 sec: 41765.3). Total num frames: 2071707648. Throughput: 0: 10453.5. Samples: 267905220. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:43,979][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:45,380][626795] Updated weights for policy 0, policy_version 252902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:47,362][626795] Updated weights for policy 0, policy_version 252912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:48,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41642.7, 300 sec: 41765.3). Total num frames: 2071920640. Throughput: 0: 10448.8. Samples: 267967794. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:48,976][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:49,333][626795] Updated weights for policy 0, policy_version 252922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:51,263][626795] Updated weights for policy 0, policy_version 252932 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:53,132][626795] Updated weights for policy 0, policy_version 252942 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:53,975][24592] Fps is (10 sec: 41780.9, 60 sec: 41642.7, 300 sec: 41737.5). Total num frames: 2072125440. Throughput: 0: 10466.1. Samples: 268030698. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:53,978][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:55,284][626795] Updated weights for policy 0, policy_version 252952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:57,168][626795] Updated weights for policy 0, policy_version 252962 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:58,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41642.7, 300 sec: 41737.6). Total num frames: 2072330240. Throughput: 0: 10456.0. Samples: 268061370. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:17:58,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:17:59,158][626795] Updated weights for policy 0, policy_version 252972 (0.0033)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:01,239][626795] Updated weights for policy 0, policy_version 252982 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:03,083][626795] Updated weights for policy 0, policy_version 252992 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:03,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.2, 300 sec: 41765.3). Total num frames: 2072543232. Throughput: 0: 10438.7. Samples: 268123758. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:03,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:05,070][626795] Updated weights for policy 0, policy_version 253002 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:07,125][626795] Updated weights for policy 0, policy_version 253012 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:08,946][626795] Updated weights for policy 0, policy_version 253022 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:08,976][24592] Fps is (10 sec: 42595.9, 60 sec: 41778.8, 300 sec: 41765.2). Total num frames: 2072756224. Throughput: 0: 10419.8. Samples: 268185840. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:08,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:10,975][626795] Updated weights for policy 0, policy_version 253032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:12,939][626795] Updated weights for policy 0, policy_version 253042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:13,976][24592] Fps is (10 sec: 41777.2, 60 sec: 41778.8, 300 sec: 41765.3). Total num frames: 2072961024. Throughput: 0: 10423.9. Samples: 268217598. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:13,977][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:14,961][626795] Updated weights for policy 0, policy_version 253052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:16,858][626795] Updated weights for policy 0, policy_version 253062 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:18,796][626795] Updated weights for policy 0, policy_version 253072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:18,975][24592] Fps is (10 sec: 40962.3, 60 sec: 41643.6, 300 sec: 41737.5). Total num frames: 2073165824. Throughput: 0: 10418.7. Samples: 268279782. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:18,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:20,722][626795] Updated weights for policy 0, policy_version 253082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:22,698][626795] Updated weights for policy 0, policy_version 253092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:23,976][24592] Fps is (10 sec: 41780.0, 60 sec: 41779.0, 300 sec: 41765.3). Total num frames: 2073378816. Throughput: 0: 10425.6. Samples: 268343268. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:23,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:24,714][626795] Updated weights for policy 0, policy_version 253102 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:26,613][626795] Updated weights for policy 0, policy_version 253112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:28,586][626795] Updated weights for policy 0, policy_version 253122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:28,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.3, 300 sec: 41765.3). Total num frames: 2073591808. Throughput: 0: 10428.2. Samples: 268374486. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:28,977][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:30,620][626795] Updated weights for policy 0, policy_version 253132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:32,523][626795] Updated weights for policy 0, policy_version 253142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:33,977][24592] Fps is (10 sec: 41775.5, 60 sec: 41641.9, 300 sec: 41765.1). Total num frames: 2073796608. Throughput: 0: 10427.8. Samples: 268437060. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:33,978][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:34,564][626795] Updated weights for policy 0, policy_version 253152 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:36,461][626795] Updated weights for policy 0, policy_version 253162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:38,455][626795] Updated weights for policy 0, policy_version 253172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:38,976][24592] Fps is (10 sec: 40959.9, 60 sec: 41642.7, 300 sec: 41765.3). Total num frames: 2074001408. Throughput: 0: 10411.5. Samples: 268499214. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:38,978][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:40,577][626795] Updated weights for policy 0, policy_version 253182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:42,382][626795] Updated weights for policy 0, policy_version 253192 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:43,976][24592] Fps is (10 sec: 41783.7, 60 sec: 41779.4, 300 sec: 41765.3). Total num frames: 2074214400. Throughput: 0: 10408.5. Samples: 268529754. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:43,977][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:44,396][626795] Updated weights for policy 0, policy_version 253202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:46,450][626795] Updated weights for policy 0, policy_version 253212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:48,286][626795] Updated weights for policy 0, policy_version 253222 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41642.7, 300 sec: 41765.3). Total num frames: 2074419200. Throughput: 0: 10416.4. Samples: 268592496. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:48,976][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:50,249][626795] Updated weights for policy 0, policy_version 253232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:52,288][626795] Updated weights for policy 0, policy_version 253242 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:53,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41642.6, 300 sec: 41737.5). Total num frames: 2074624000. Throughput: 0: 10426.0. Samples: 268655004. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:53,976][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:54,158][626795] Updated weights for policy 0, policy_version 253252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:56,219][626795] Updated weights for policy 0, policy_version 253262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:18:58,132][626795] Updated weights for policy 0, policy_version 253272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:58,976][24592] Fps is (10 sec: 41777.8, 60 sec: 41779.0, 300 sec: 41737.5). Total num frames: 2074836992. Throughput: 0: 10419.0. Samples: 268686450. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:18:58,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:00,134][626795] Updated weights for policy 0, policy_version 253282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:01,962][626795] Updated weights for policy 0, policy_version 253292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:03,972][626795] Updated weights for policy 0, policy_version 253302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:03,976][24592] Fps is (10 sec: 42596.4, 60 sec: 41778.9, 300 sec: 41737.5). Total num frames: 2075049984. Throughput: 0: 10427.6. Samples: 268749030. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:03,977][24592] Avg episode reward: [(0, '4.849')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000253302_2075049984.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:04,063][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000252079_2065031168.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:05,969][626795] Updated weights for policy 0, policy_version 253312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:08,087][626795] Updated weights for policy 0, policy_version 253322 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:08,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41642.6, 300 sec: 41737.5). Total num frames: 2075254784. Throughput: 0: 10397.8. Samples: 268811172. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:08,977][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:09,919][626795] Updated weights for policy 0, policy_version 253332 (0.0044)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:11,928][626795] Updated weights for policy 0, policy_version 253342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:13,957][626795] Updated weights for policy 0, policy_version 253352 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:13,975][24592] Fps is (10 sec: 40962.1, 60 sec: 41643.0, 300 sec: 41709.8). Total num frames: 2075459584. Throughput: 0: 10397.9. Samples: 268842390. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:13,977][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:15,770][626795] Updated weights for policy 0, policy_version 253362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:17,809][626795] Updated weights for policy 0, policy_version 253372 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:18,975][24592] Fps is (10 sec: 40962.5, 60 sec: 41642.7, 300 sec: 41709.8). Total num frames: 2075664384. Throughput: 0: 10394.6. Samples: 268904802. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:18,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:19,770][626795] Updated weights for policy 0, policy_version 253382 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:21,787][626795] Updated weights for policy 0, policy_version 253392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:23,732][626795] Updated weights for policy 0, policy_version 253402 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41642.9, 300 sec: 41739.8). Total num frames: 2075877376. Throughput: 0: 10409.7. Samples: 268967652. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:23,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:25,719][626795] Updated weights for policy 0, policy_version 253412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:27,635][626795] Updated weights for policy 0, policy_version 253422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:28,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.1, 300 sec: 41709.8). Total num frames: 2076082176. Throughput: 0: 10410.0. Samples: 268998204. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:28,977][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:29,660][626795] Updated weights for policy 0, policy_version 253432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:31,577][626795] Updated weights for policy 0, policy_version 253442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:33,502][626795] Updated weights for policy 0, policy_version 253452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:33,976][24592] Fps is (10 sec: 41778.7, 60 sec: 41643.4, 300 sec: 41709.8). Total num frames: 2076295168. Throughput: 0: 10415.7. Samples: 269061204. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:33,976][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:35,524][626795] Updated weights for policy 0, policy_version 253462 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:37,478][626795] Updated weights for policy 0, policy_version 253472 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:38,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41642.5, 300 sec: 41682.1). Total num frames: 2076499968. Throughput: 0: 10416.6. Samples: 269123754. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:38,979][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:39,376][626795] Updated weights for policy 0, policy_version 253482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:41,412][626795] Updated weights for policy 0, policy_version 253492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:43,339][626795] Updated weights for policy 0, policy_version 253502 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:43,976][24592] Fps is (10 sec: 41776.8, 60 sec: 41642.3, 300 sec: 41737.5). Total num frames: 2076712960. Throughput: 0: 10413.1. Samples: 269155044. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:43,977][24592] Avg episode reward: [(0, '4.424')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:45,387][626795] Updated weights for policy 0, policy_version 253512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:47,315][626795] Updated weights for policy 0, policy_version 253522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:48,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41642.7, 300 sec: 41709.8). Total num frames: 2076917760. Throughput: 0: 10417.6. Samples: 269217816. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:48,978][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:49,208][626795] Updated weights for policy 0, policy_version 253532 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:51,199][626795] Updated weights for policy 0, policy_version 253542 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:53,161][626795] Updated weights for policy 0, policy_version 253552 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:53,975][24592] Fps is (10 sec: 41781.9, 60 sec: 41779.2, 300 sec: 41709.8). Total num frames: 2077130752. Throughput: 0: 10421.5. Samples: 269280132. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:53,978][24592] Avg episode reward: [(0, '4.371')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:55,245][626795] Updated weights for policy 0, policy_version 253562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:57,191][626795] Updated weights for policy 0, policy_version 253572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:58,976][24592] Fps is (10 sec: 41777.8, 60 sec: 41642.7, 300 sec: 41709.7). Total num frames: 2077335552. Throughput: 0: 10412.7. Samples: 269310966. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:19:58,979][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:19:59,117][626795] Updated weights for policy 0, policy_version 253582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:01,171][626795] Updated weights for policy 0, policy_version 253592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:03,128][626795] Updated weights for policy 0, policy_version 253602 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:03,976][24592] Fps is (10 sec: 40958.2, 60 sec: 41506.1, 300 sec: 41709.8). Total num frames: 2077540352. Throughput: 0: 10391.9. Samples: 269372442. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:03,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:05,142][626795] Updated weights for policy 0, policy_version 253612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:07,100][626795] Updated weights for policy 0, policy_version 253622 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:08,975][24592] Fps is (10 sec: 40961.1, 60 sec: 41506.5, 300 sec: 41682.0). Total num frames: 2077745152. Throughput: 0: 10386.9. Samples: 269435064. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:08,976][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:09,117][626795] Updated weights for policy 0, policy_version 253632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:11,050][626795] Updated weights for policy 0, policy_version 253642 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:13,077][626795] Updated weights for policy 0, policy_version 253652 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:13,976][24592] Fps is (10 sec: 40961.2, 60 sec: 41506.0, 300 sec: 41682.0). Total num frames: 2077949952. Throughput: 0: 10383.9. Samples: 269465484. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:13,976][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:15,080][626795] Updated weights for policy 0, policy_version 253662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:17,017][626795] Updated weights for policy 0, policy_version 253672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:18,961][626795] Updated weights for policy 0, policy_version 253682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:18,986][24592] Fps is (10 sec: 41736.3, 60 sec: 41635.5, 300 sec: 41680.6). Total num frames: 2078162944. Throughput: 0: 10353.9. Samples: 269527236. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:18,986][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:20,938][626795] Updated weights for policy 0, policy_version 253692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:22,830][626795] Updated weights for policy 0, policy_version 253702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:23,976][24592] Fps is (10 sec: 41779.5, 60 sec: 41506.0, 300 sec: 41682.0). Total num frames: 2078367744. Throughput: 0: 10376.6. Samples: 269590698. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:23,977][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:24,876][626795] Updated weights for policy 0, policy_version 253712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:26,780][626795] Updated weights for policy 0, policy_version 253722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:28,779][626795] Updated weights for policy 0, policy_version 253732 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:28,977][24592] Fps is (10 sec: 41814.1, 60 sec: 41641.3, 300 sec: 41681.8). Total num frames: 2078580736. Throughput: 0: 10374.9. Samples: 269621928. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:28,979][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:30,584][626795] Updated weights for policy 0, policy_version 253742 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:32,609][626795] Updated weights for policy 0, policy_version 253752 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:33,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41505.9, 300 sec: 41681.9). Total num frames: 2078785536. Throughput: 0: 10392.4. Samples: 269685480. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:33,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:34,539][626795] Updated weights for policy 0, policy_version 253762 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:36,536][626795] Updated weights for policy 0, policy_version 253772 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:38,389][626795] Updated weights for policy 0, policy_version 253782 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:38,975][24592] Fps is (10 sec: 40968.0, 60 sec: 41506.3, 300 sec: 41654.3). Total num frames: 2078990336. Throughput: 0: 10409.1. Samples: 269748540. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:38,977][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:40,441][626795] Updated weights for policy 0, policy_version 253792 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:42,414][626795] Updated weights for policy 0, policy_version 253802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:43,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41506.6, 300 sec: 41654.2). Total num frames: 2079203328. Throughput: 0: 10401.7. Samples: 269779038. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:43,976][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:44,404][626795] Updated weights for policy 0, policy_version 253812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:46,386][626795] Updated weights for policy 0, policy_version 253822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:48,371][626795] Updated weights for policy 0, policy_version 253832 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:48,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.1, 300 sec: 41654.2). Total num frames: 2079408128. Throughput: 0: 10411.1. Samples: 269840934. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:48,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:50,449][626795] Updated weights for policy 0, policy_version 253842 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:52,379][626795] Updated weights for policy 0, policy_version 253852 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:53,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41506.0, 300 sec: 41682.0). Total num frames: 2079621120. Throughput: 0: 10402.0. Samples: 269903154. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:53,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:54,350][626795] Updated weights for policy 0, policy_version 253862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:56,366][626795] Updated weights for policy 0, policy_version 253872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:20:58,237][626795] Updated weights for policy 0, policy_version 253882 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:58,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41642.9, 300 sec: 41709.8). Total num frames: 2079834112. Throughput: 0: 10413.8. Samples: 269934102. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:20:58,978][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:00,249][626795] Updated weights for policy 0, policy_version 253892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:02,133][626795] Updated weights for policy 0, policy_version 253902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:03,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41642.8, 300 sec: 41682.0). Total num frames: 2080038912. Throughput: 0: 10439.9. Samples: 269996928. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:03,977][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000253911_2080038912.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:04,067][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000252690_2070036480.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:04,168][626795] Updated weights for policy 0, policy_version 253912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:06,181][626795] Updated weights for policy 0, policy_version 253922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:08,153][626795] Updated weights for policy 0, policy_version 253932 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:08,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41642.7, 300 sec: 41682.0). Total num frames: 2080243712. Throughput: 0: 10408.2. Samples: 270059064. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:08,977][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:10,060][626795] Updated weights for policy 0, policy_version 253942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:12,118][626795] Updated weights for policy 0, policy_version 253952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:13,896][626795] Updated weights for policy 0, policy_version 253962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:13,976][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.2, 300 sec: 41682.0). Total num frames: 2080456704. Throughput: 0: 10400.0. Samples: 270089910. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:13,976][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:15,901][626795] Updated weights for policy 0, policy_version 253972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:17,912][626795] Updated weights for policy 0, policy_version 253982 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:18,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41649.8, 300 sec: 41654.2). Total num frames: 2080661504. Throughput: 0: 10394.3. Samples: 270153222. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:18,976][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:19,998][626795] Updated weights for policy 0, policy_version 253992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:21,856][626795] Updated weights for policy 0, policy_version 254002 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:23,788][626795] Updated weights for policy 0, policy_version 254012 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:23,975][24592] Fps is (10 sec: 40961.0, 60 sec: 41642.8, 300 sec: 41654.2). Total num frames: 2080866304. Throughput: 0: 10375.1. Samples: 270215418. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:23,980][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:25,772][626795] Updated weights for policy 0, policy_version 254022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:27,718][626795] Updated weights for policy 0, policy_version 254032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:28,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41644.0, 300 sec: 41654.3). Total num frames: 2081079296. Throughput: 0: 10393.7. Samples: 270246756. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:28,976][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:29,717][626795] Updated weights for policy 0, policy_version 254042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:31,677][626795] Updated weights for policy 0, policy_version 254052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:33,510][626795] Updated weights for policy 0, policy_version 254062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:33,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41642.9, 300 sec: 41654.2). Total num frames: 2081284096. Throughput: 0: 10414.7. Samples: 270309594. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:33,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:35,547][626795] Updated weights for policy 0, policy_version 254072 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:37,484][626795] Updated weights for policy 0, policy_version 254082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:38,976][24592] Fps is (10 sec: 40960.1, 60 sec: 41642.7, 300 sec: 41626.5). Total num frames: 2081488896. Throughput: 0: 10420.9. Samples: 270372090. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:38,976][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:39,492][626795] Updated weights for policy 0, policy_version 254092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:41,359][626795] Updated weights for policy 0, policy_version 254102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:43,398][626795] Updated weights for policy 0, policy_version 254112 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:43,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41779.2, 300 sec: 41654.3). Total num frames: 2081710080. Throughput: 0: 10437.7. Samples: 270403800. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:43,976][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:45,388][626795] Updated weights for policy 0, policy_version 254122 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:47,358][626795] Updated weights for policy 0, policy_version 254132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:48,976][24592] Fps is (10 sec: 42596.1, 60 sec: 41778.8, 300 sec: 41654.2). Total num frames: 2081914880. Throughput: 0: 10439.0. Samples: 270466686. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:48,978][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:49,149][626795] Updated weights for policy 0, policy_version 254142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:51,172][626795] Updated weights for policy 0, policy_version 254152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:53,190][626795] Updated weights for policy 0, policy_version 254162 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:53,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41642.8, 300 sec: 41654.3). Total num frames: 2082119680. Throughput: 0: 10457.9. Samples: 270529668. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:53,976][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:55,164][626795] Updated weights for policy 0, policy_version 254172 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:57,044][626795] Updated weights for policy 0, policy_version 254182 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:58,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41642.2, 300 sec: 41681.9). Total num frames: 2082332672. Throughput: 0: 10471.7. Samples: 270561144. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:21:58,977][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:21:58,999][626795] Updated weights for policy 0, policy_version 254192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:00,957][626795] Updated weights for policy 0, policy_version 254202 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:02,974][626795] Updated weights for policy 0, policy_version 254212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:03,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.4, 300 sec: 41682.0). Total num frames: 2082545664. Throughput: 0: 10464.4. Samples: 270624120. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:03,976][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:04,898][626795] Updated weights for policy 0, policy_version 254222 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:06,853][626795] Updated weights for policy 0, policy_version 254232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:08,777][626795] Updated weights for policy 0, policy_version 254242 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:08,976][24592] Fps is (10 sec: 42601.0, 60 sec: 41915.6, 300 sec: 41709.8). Total num frames: 2082758656. Throughput: 0: 10490.8. Samples: 270687504. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:08,976][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:10,749][626795] Updated weights for policy 0, policy_version 254252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:12,643][626795] Updated weights for policy 0, policy_version 254262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:13,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.4, 300 sec: 41682.2). Total num frames: 2082963456. Throughput: 0: 10486.3. Samples: 270718638. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:13,978][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:14,714][626795] Updated weights for policy 0, policy_version 254272 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:16,739][626795] Updated weights for policy 0, policy_version 254282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:18,575][626795] Updated weights for policy 0, policy_version 254292 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:18,975][24592] Fps is (10 sec: 40960.7, 60 sec: 41779.3, 300 sec: 41682.0). Total num frames: 2083168256. Throughput: 0: 10474.5. Samples: 270780948. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:18,977][24592] Avg episode reward: [(0, '4.364')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:20,589][626795] Updated weights for policy 0, policy_version 254302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:22,646][626795] Updated weights for policy 0, policy_version 254312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:23,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41779.2, 300 sec: 41654.3). Total num frames: 2083373056. Throughput: 0: 10459.9. Samples: 270842784. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:23,976][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:24,637][626795] Updated weights for policy 0, policy_version 254322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:26,465][626795] Updated weights for policy 0, policy_version 254332 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:28,528][626795] Updated weights for policy 0, policy_version 254342 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:28,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.2, 300 sec: 41654.3). Total num frames: 2083586048. Throughput: 0: 10448.1. Samples: 270873966. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:28,976][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:30,379][626795] Updated weights for policy 0, policy_version 254352 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:32,356][626795] Updated weights for policy 0, policy_version 254362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:33,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41779.0, 300 sec: 41654.2). Total num frames: 2083790848. Throughput: 0: 10449.6. Samples: 270936918. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:33,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:34,368][626795] Updated weights for policy 0, policy_version 254372 (0.0039)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:36,332][626795] Updated weights for policy 0, policy_version 254382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:38,254][626795] Updated weights for policy 0, policy_version 254392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:38,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42052.2, 300 sec: 41709.8). Total num frames: 2084012032. Throughput: 0: 10451.1. Samples: 270999966. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:38,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:40,221][626795] Updated weights for policy 0, policy_version 254402 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:42,155][626795] Updated weights for policy 0, policy_version 254412 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:43,937][626795] Updated weights for policy 0, policy_version 254422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:43,975][24592] Fps is (10 sec: 43419.1, 60 sec: 41915.7, 300 sec: 41709.8). Total num frames: 2084225024. Throughput: 0: 10457.0. Samples: 271031700. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:43,978][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:45,255][626772] Signal inference workers to stop experience collection... (3600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:45,260][626772] Signal inference workers to resume experience collection... (3600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:45,273][626795] InferenceWorker_p0-w0: stopping experience collection (3600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:45,277][626795] InferenceWorker_p0-w0: resuming experience collection (3600 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:46,005][626795] Updated weights for policy 0, policy_version 254432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:47,799][626795] Updated weights for policy 0, policy_version 254442 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41916.1, 300 sec: 41709.8). Total num frames: 2084429824. Throughput: 0: 10490.7. Samples: 271096200. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:48,976][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:49,784][626795] Updated weights for policy 0, policy_version 254452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:51,662][626795] Updated weights for policy 0, policy_version 254462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:53,637][626795] Updated weights for policy 0, policy_version 254472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:53,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42052.3, 300 sec: 41737.5). Total num frames: 2084642816. Throughput: 0: 10494.4. Samples: 271159752. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:53,977][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:55,637][626795] Updated weights for policy 0, policy_version 254482 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:57,706][626795] Updated weights for policy 0, policy_version 254492 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:58,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41916.1, 300 sec: 41709.8). Total num frames: 2084847616. Throughput: 0: 10478.2. Samples: 271190160. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:22:58,978][24592] Avg episode reward: [(0, '4.886')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:22:59,603][626795] Updated weights for policy 0, policy_version 254502 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:01,484][626795] Updated weights for policy 0, policy_version 254512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:03,472][626795] Updated weights for policy 0, policy_version 254522 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:03,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42052.2, 300 sec: 41737.6). Total num frames: 2085068800. Throughput: 0: 10500.9. Samples: 271253490. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:03,978][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000254525_2085068800.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:04,061][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000253302_2075049984.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:05,460][626795] Updated weights for policy 0, policy_version 254532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:07,425][626795] Updated weights for policy 0, policy_version 254542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:08,975][24592] Fps is (10 sec: 42599.1, 60 sec: 41915.8, 300 sec: 41737.6). Total num frames: 2085273600. Throughput: 0: 10523.7. Samples: 271316352. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:08,976][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:09,325][626795] Updated weights for policy 0, policy_version 254552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:11,311][626795] Updated weights for policy 0, policy_version 254562 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:13,249][626795] Updated weights for policy 0, policy_version 254572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:13,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41915.6, 300 sec: 41737.5). Total num frames: 2085478400. Throughput: 0: 10511.3. Samples: 271346976. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:13,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:15,269][626795] Updated weights for policy 0, policy_version 254582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:17,222][626795] Updated weights for policy 0, policy_version 254592 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42052.2, 300 sec: 41737.6). Total num frames: 2085691392. Throughput: 0: 10524.9. Samples: 271410534. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:18,977][24592] Avg episode reward: [(0, '5.050')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:19,053][626795] Updated weights for policy 0, policy_version 254602 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:21,055][626795] Updated weights for policy 0, policy_version 254612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:22,962][626795] Updated weights for policy 0, policy_version 254622 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:23,976][24592] Fps is (10 sec: 41779.3, 60 sec: 42052.2, 300 sec: 41709.8). Total num frames: 2085896192. Throughput: 0: 10514.9. Samples: 271473138. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:23,978][24592] Avg episode reward: [(0, '4.878')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:25,031][626795] Updated weights for policy 0, policy_version 254632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:27,019][626795] Updated weights for policy 0, policy_version 254642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:28,946][626795] Updated weights for policy 0, policy_version 254652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:28,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42052.2, 300 sec: 41737.7). Total num frames: 2086109184. Throughput: 0: 10494.5. Samples: 271503954. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:28,977][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:30,904][626795] Updated weights for policy 0, policy_version 254662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:32,824][626795] Updated weights for policy 0, policy_version 254672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:33,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42189.0, 300 sec: 41765.3). Total num frames: 2086322176. Throughput: 0: 10475.5. Samples: 271567596. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:33,977][24592] Avg episode reward: [(0, '4.973')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:34,772][626795] Updated weights for policy 0, policy_version 254682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:36,717][626795] Updated weights for policy 0, policy_version 254692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:38,491][626795] Updated weights for policy 0, policy_version 254702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:38,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41915.7, 300 sec: 41737.6). Total num frames: 2086526976. Throughput: 0: 10477.6. Samples: 271631244. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:38,977][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:40,564][626795] Updated weights for policy 0, policy_version 254712 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:42,509][626795] Updated weights for policy 0, policy_version 254722 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41915.7, 300 sec: 41765.3). Total num frames: 2086739968. Throughput: 0: 10496.8. Samples: 271662516. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:43,978][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:44,447][626795] Updated weights for policy 0, policy_version 254732 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:46,319][626795] Updated weights for policy 0, policy_version 254742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:48,264][626795] Updated weights for policy 0, policy_version 254752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:48,976][24592] Fps is (10 sec: 42597.0, 60 sec: 42052.1, 300 sec: 41793.0). Total num frames: 2086952960. Throughput: 0: 10507.8. Samples: 271726344. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:48,976][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:50,200][626795] Updated weights for policy 0, policy_version 254762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:52,077][626795] Updated weights for policy 0, policy_version 254772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:53,967][626795] Updated weights for policy 0, policy_version 254782 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:53,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42188.8, 300 sec: 41820.9). Total num frames: 2087174144. Throughput: 0: 10549.6. Samples: 271791084. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:53,977][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:55,905][626795] Updated weights for policy 0, policy_version 254792 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:23:57,958][626795] Updated weights for policy 0, policy_version 254802 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:58,981][24592] Fps is (10 sec: 42575.8, 60 sec: 42185.0, 300 sec: 41792.4). Total num frames: 2087378944. Throughput: 0: 10535.9. Samples: 271821150. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:23:58,982][24592] Avg episode reward: [(0, '4.281')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:00,074][626795] Updated weights for policy 0, policy_version 254812 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:01,791][626795] Updated weights for policy 0, policy_version 254822 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:03,757][626795] Updated weights for policy 0, policy_version 254832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:03,977][24592] Fps is (10 sec: 41778.9, 60 sec: 42052.3, 300 sec: 41820.9). Total num frames: 2087591936. Throughput: 0: 10534.0. Samples: 271884564. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:03,991][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:05,765][626795] Updated weights for policy 0, policy_version 254842 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:07,699][626795] Updated weights for policy 0, policy_version 254852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:08,975][24592] Fps is (10 sec: 41802.4, 60 sec: 42052.2, 300 sec: 41820.8). Total num frames: 2087796736. Throughput: 0: 10549.7. Samples: 271947876. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:08,977][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:09,660][626795] Updated weights for policy 0, policy_version 254862 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:11,637][626795] Updated weights for policy 0, policy_version 254872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:13,512][626795] Updated weights for policy 0, policy_version 254882 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:13,975][24592] Fps is (10 sec: 41779.7, 60 sec: 42188.9, 300 sec: 41848.6). Total num frames: 2088009728. Throughput: 0: 10559.1. Samples: 271979112. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:13,976][24592] Avg episode reward: [(0, '4.913')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:15,372][626795] Updated weights for policy 0, policy_version 254892 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:17,384][626795] Updated weights for policy 0, policy_version 254902 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:18,977][24592] Fps is (10 sec: 42592.5, 60 sec: 42187.9, 300 sec: 41848.4). Total num frames: 2088222720. Throughput: 0: 10573.5. Samples: 272043420. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:18,978][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:19,362][626795] Updated weights for policy 0, policy_version 254912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:21,205][626795] Updated weights for policy 0, policy_version 254922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:23,178][626795] Updated weights for policy 0, policy_version 254932 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:23,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42325.4, 300 sec: 41876.4). Total num frames: 2088435712. Throughput: 0: 10556.5. Samples: 272106288. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:23,976][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:25,141][626795] Updated weights for policy 0, policy_version 254942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:27,022][626795] Updated weights for policy 0, policy_version 254952 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:28,975][24592] Fps is (10 sec: 41785.0, 60 sec: 42188.8, 300 sec: 41848.6). Total num frames: 2088640512. Throughput: 0: 10565.2. Samples: 272137950. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:28,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:29,037][626795] Updated weights for policy 0, policy_version 254962 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:31,002][626795] Updated weights for policy 0, policy_version 254972 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:32,935][626795] Updated weights for policy 0, policy_version 254982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:33,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42188.8, 300 sec: 41876.4). Total num frames: 2088853504. Throughput: 0: 10543.0. Samples: 272200776. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:33,976][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:34,915][626795] Updated weights for policy 0, policy_version 254992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:36,846][626795] Updated weights for policy 0, policy_version 255002 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:38,764][626795] Updated weights for policy 0, policy_version 255012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:38,975][24592] Fps is (10 sec: 42598.9, 60 sec: 42325.4, 300 sec: 41876.5). Total num frames: 2089066496. Throughput: 0: 10507.2. Samples: 272263908. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:38,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:40,758][626795] Updated weights for policy 0, policy_version 255022 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:42,667][626795] Updated weights for policy 0, policy_version 255032 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:43,976][24592] Fps is (10 sec: 41778.1, 60 sec: 42188.6, 300 sec: 41876.4). Total num frames: 2089271296. Throughput: 0: 10545.4. Samples: 272295636. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:43,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:44,696][626795] Updated weights for policy 0, policy_version 255042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:46,580][626795] Updated weights for policy 0, policy_version 255052 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:48,425][626795] Updated weights for policy 0, policy_version 255062 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:48,976][24592] Fps is (10 sec: 41778.5, 60 sec: 42188.9, 300 sec: 41876.4). Total num frames: 2089484288. Throughput: 0: 10542.8. Samples: 272358990. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:48,977][24592] Avg episode reward: [(0, '4.861')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:50,348][626795] Updated weights for policy 0, policy_version 255072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:52,365][626795] Updated weights for policy 0, policy_version 255082 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:53,976][24592] Fps is (10 sec: 42598.6, 60 sec: 42052.1, 300 sec: 41904.2). Total num frames: 2089697280. Throughput: 0: 10548.0. Samples: 272422536. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:53,976][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:54,320][626795] Updated weights for policy 0, policy_version 255092 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:56,271][626795] Updated weights for policy 0, policy_version 255102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:24:58,177][626795] Updated weights for policy 0, policy_version 255112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:58,975][24592] Fps is (10 sec: 42599.0, 60 sec: 42192.8, 300 sec: 41932.0). Total num frames: 2089910272. Throughput: 0: 10541.2. Samples: 272453466. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:24:58,977][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:00,137][626795] Updated weights for policy 0, policy_version 255122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:02,151][626795] Updated weights for policy 0, policy_version 255132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:03,975][24592] Fps is (10 sec: 41779.6, 60 sec: 42052.3, 300 sec: 41931.9). Total num frames: 2090115072. Throughput: 0: 10504.1. Samples: 272516088. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:03,976][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000255141_2090115072.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:04,077][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000253911_2080038912.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:04,228][626795] Updated weights for policy 0, policy_version 255142 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:06,122][626795] Updated weights for policy 0, policy_version 255152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:08,017][626795] Updated weights for policy 0, policy_version 255162 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:08,975][24592] Fps is (10 sec: 40959.5, 60 sec: 42052.3, 300 sec: 41931.9). Total num frames: 2090319872. Throughput: 0: 10502.1. Samples: 272578884. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:08,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:10,013][626795] Updated weights for policy 0, policy_version 255172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:12,026][626795] Updated weights for policy 0, policy_version 255182 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:13,892][626795] Updated weights for policy 0, policy_version 255192 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:13,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42188.7, 300 sec: 41961.2). Total num frames: 2090541056. Throughput: 0: 10494.5. Samples: 272610204. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:13,978][24592] Avg episode reward: [(0, '4.336')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:15,850][626795] Updated weights for policy 0, policy_version 255202 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:17,799][626795] Updated weights for policy 0, policy_version 255212 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:18,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42053.3, 300 sec: 41959.7). Total num frames: 2090745856. Throughput: 0: 10515.8. Samples: 272673990. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:18,977][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:19,715][626795] Updated weights for policy 0, policy_version 255222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:21,707][626795] Updated weights for policy 0, policy_version 255232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:23,573][626795] Updated weights for policy 0, policy_version 255242 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:23,975][24592] Fps is (10 sec: 41779.4, 60 sec: 42052.3, 300 sec: 41960.0). Total num frames: 2090958848. Throughput: 0: 10515.3. Samples: 272737098. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:23,977][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:25,566][626795] Updated weights for policy 0, policy_version 255252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:27,488][626795] Updated weights for policy 0, policy_version 255262 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:28,975][24592] Fps is (10 sec: 41779.5, 60 sec: 42052.3, 300 sec: 41959.8). Total num frames: 2091163648. Throughput: 0: 10498.7. Samples: 272768076. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:28,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:29,492][626795] Updated weights for policy 0, policy_version 255272 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:31,487][626795] Updated weights for policy 0, policy_version 255282 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:33,361][626795] Updated weights for policy 0, policy_version 255292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:33,976][24592] Fps is (10 sec: 41778.6, 60 sec: 42052.1, 300 sec: 41987.4). Total num frames: 2091376640. Throughput: 0: 10498.5. Samples: 272831424. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:33,978][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:35,384][626795] Updated weights for policy 0, policy_version 255302 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:37,263][626795] Updated weights for policy 0, policy_version 255312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:38,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2091581440. Throughput: 0: 10474.8. Samples: 272893902. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:38,977][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:39,290][626795] Updated weights for policy 0, policy_version 255322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:41,232][626795] Updated weights for policy 0, policy_version 255332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:43,226][626795] Updated weights for policy 0, policy_version 255342 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:43,976][24592] Fps is (10 sec: 40960.1, 60 sec: 41915.8, 300 sec: 41959.7). Total num frames: 2091786240. Throughput: 0: 10474.1. Samples: 272924802. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:43,978][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:45,153][626795] Updated weights for policy 0, policy_version 255352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:47,067][626795] Updated weights for policy 0, policy_version 255362 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:48,976][24592] Fps is (10 sec: 41776.6, 60 sec: 41915.4, 300 sec: 41959.6). Total num frames: 2091999232. Throughput: 0: 10484.1. Samples: 272987880. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:48,977][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:49,101][626795] Updated weights for policy 0, policy_version 255372 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:51,028][626795] Updated weights for policy 0, policy_version 255382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:53,032][626795] Updated weights for policy 0, policy_version 255392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:53,976][24592] Fps is (10 sec: 42598.0, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2092212224. Throughput: 0: 10474.5. Samples: 273050238. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:53,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:55,006][626795] Updated weights for policy 0, policy_version 255402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:56,962][626795] Updated weights for policy 0, policy_version 255412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:25:58,912][626795] Updated weights for policy 0, policy_version 255422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:58,975][24592] Fps is (10 sec: 41781.9, 60 sec: 41779.2, 300 sec: 41959.7). Total num frames: 2092417024. Throughput: 0: 10471.7. Samples: 273081432. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:25:58,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:00,911][626795] Updated weights for policy 0, policy_version 255432 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:02,852][626795] Updated weights for policy 0, policy_version 255442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:03,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41915.8, 300 sec: 41987.5). Total num frames: 2092630016. Throughput: 0: 10457.6. Samples: 273144582. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:03,976][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:04,731][626795] Updated weights for policy 0, policy_version 255452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:06,836][626795] Updated weights for policy 0, policy_version 255462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:08,622][626795] Updated weights for policy 0, policy_version 255472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:08,976][24592] Fps is (10 sec: 41775.2, 60 sec: 41915.2, 300 sec: 41959.6). Total num frames: 2092834816. Throughput: 0: 10448.1. Samples: 273207270. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:08,980][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:10,717][626795] Updated weights for policy 0, policy_version 255482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:12,619][626795] Updated weights for policy 0, policy_version 255492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:13,976][24592] Fps is (10 sec: 40959.2, 60 sec: 41642.6, 300 sec: 41959.7). Total num frames: 2093039616. Throughput: 0: 10439.0. Samples: 273237834. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:13,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:14,669][626795] Updated weights for policy 0, policy_version 255502 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:16,561][626795] Updated weights for policy 0, policy_version 255512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:18,538][626795] Updated weights for policy 0, policy_version 255522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:18,975][24592] Fps is (10 sec: 41783.1, 60 sec: 41779.2, 300 sec: 41987.5). Total num frames: 2093252608. Throughput: 0: 10419.4. Samples: 273300294. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:18,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:20,534][626795] Updated weights for policy 0, policy_version 255532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:22,487][626795] Updated weights for policy 0, policy_version 255542 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:23,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41642.6, 300 sec: 41959.7). Total num frames: 2093457408. Throughput: 0: 10438.0. Samples: 273363612. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:23,978][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:24,410][626795] Updated weights for policy 0, policy_version 255552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:26,348][626795] Updated weights for policy 0, policy_version 255562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:28,298][626795] Updated weights for policy 0, policy_version 255572 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:28,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41779.2, 300 sec: 41987.5). Total num frames: 2093670400. Throughput: 0: 10441.0. Samples: 273394644. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:28,977][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:30,232][626795] Updated weights for policy 0, policy_version 255582 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:32,129][626795] Updated weights for policy 0, policy_version 255592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:33,976][24592] Fps is (10 sec: 42597.8, 60 sec: 41779.2, 300 sec: 42015.2). Total num frames: 2093883392. Throughput: 0: 10443.7. Samples: 273457842. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:33,977][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:34,093][626795] Updated weights for policy 0, policy_version 255602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:36,053][626795] Updated weights for policy 0, policy_version 255612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:38,117][626795] Updated weights for policy 0, policy_version 255622 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:38,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.2, 300 sec: 41959.7). Total num frames: 2094088192. Throughput: 0: 10455.0. Samples: 273520710. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:38,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:40,083][626795] Updated weights for policy 0, policy_version 255632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:42,077][626795] Updated weights for policy 0, policy_version 255642 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:43,976][24592] Fps is (10 sec: 40958.4, 60 sec: 41778.9, 300 sec: 41959.7). Total num frames: 2094292992. Throughput: 0: 10445.7. Samples: 273551496. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:43,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:44,059][626795] Updated weights for policy 0, policy_version 255652 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:45,862][626795] Updated weights for policy 0, policy_version 255662 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:47,919][626795] Updated weights for policy 0, policy_version 255672 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.6, 300 sec: 41987.5). Total num frames: 2094505984. Throughput: 0: 10438.9. Samples: 273614334. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:48,979][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:49,849][626795] Updated weights for policy 0, policy_version 255682 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:51,816][626795] Updated weights for policy 0, policy_version 255692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:53,837][626795] Updated weights for policy 0, policy_version 255702 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:53,975][24592] Fps is (10 sec: 42601.1, 60 sec: 41779.4, 300 sec: 41987.6). Total num frames: 2094718976. Throughput: 0: 10440.0. Samples: 273677058. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:53,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:55,730][626795] Updated weights for policy 0, policy_version 255712 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:57,619][626795] Updated weights for policy 0, policy_version 255722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:58,991][24592] Fps is (10 sec: 41714.2, 60 sec: 41768.3, 300 sec: 41957.5). Total num frames: 2094923776. Throughput: 0: 10456.8. Samples: 273708552. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:26:58,992][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:26:59,599][626795] Updated weights for policy 0, policy_version 255732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:01,509][626795] Updated weights for policy 0, policy_version 255742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:03,431][626795] Updated weights for policy 0, policy_version 255752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:03,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.2, 300 sec: 41959.7). Total num frames: 2095136768. Throughput: 0: 10481.1. Samples: 273771942. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:03,977][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000255754_2095136768.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:04,090][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000254525_2085068800.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:05,423][626795] Updated weights for policy 0, policy_version 255762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:07,388][626795] Updated weights for policy 0, policy_version 255772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:08,976][24592] Fps is (10 sec: 42664.4, 60 sec: 41916.2, 300 sec: 41987.4). Total num frames: 2095349760. Throughput: 0: 10464.2. Samples: 273834504. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:08,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:09,445][626795] Updated weights for policy 0, policy_version 255782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:11,387][626795] Updated weights for policy 0, policy_version 255792 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:13,433][626795] Updated weights for policy 0, policy_version 255802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:13,977][24592] Fps is (10 sec: 41771.8, 60 sec: 41914.6, 300 sec: 41987.2). Total num frames: 2095554560. Throughput: 0: 10458.8. Samples: 273865308. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:13,978][24592] Avg episode reward: [(0, '4.493')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:15,356][626795] Updated weights for policy 0, policy_version 255812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:17,292][626795] Updated weights for policy 0, policy_version 255822 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:18,977][24592] Fps is (10 sec: 40956.0, 60 sec: 41778.4, 300 sec: 41987.3). Total num frames: 2095759360. Throughput: 0: 10430.0. Samples: 273927204. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:18,978][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:19,391][626795] Updated weights for policy 0, policy_version 255832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:21,285][626795] Updated weights for policy 0, policy_version 255842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:23,202][626795] Updated weights for policy 0, policy_version 255852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:23,976][24592] Fps is (10 sec: 41785.1, 60 sec: 41915.5, 300 sec: 41987.4). Total num frames: 2095972352. Throughput: 0: 10434.7. Samples: 273990276. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:23,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:25,252][626795] Updated weights for policy 0, policy_version 255862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:27,141][626795] Updated weights for policy 0, policy_version 255872 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:28,975][24592] Fps is (10 sec: 41783.8, 60 sec: 41779.1, 300 sec: 41987.5). Total num frames: 2096177152. Throughput: 0: 10445.5. Samples: 274021536. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:28,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:29,037][626795] Updated weights for policy 0, policy_version 255882 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:31,065][626795] Updated weights for policy 0, policy_version 255892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:32,955][626795] Updated weights for policy 0, policy_version 255902 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:33,975][24592] Fps is (10 sec: 41780.9, 60 sec: 41779.4, 300 sec: 41959.7). Total num frames: 2096390144. Throughput: 0: 10450.4. Samples: 274084602. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:33,976][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:34,958][626795] Updated weights for policy 0, policy_version 255912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:36,891][626795] Updated weights for policy 0, policy_version 255922 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:38,751][626795] Updated weights for policy 0, policy_version 255932 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:38,976][24592] Fps is (10 sec: 41777.9, 60 sec: 41779.0, 300 sec: 41931.9). Total num frames: 2096594944. Throughput: 0: 10464.8. Samples: 274147980. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:38,977][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:40,747][626795] Updated weights for policy 0, policy_version 255942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:42,800][626795] Updated weights for policy 0, policy_version 255952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:43,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41779.6, 300 sec: 41931.9). Total num frames: 2096799744. Throughput: 0: 10451.4. Samples: 274178700. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:43,976][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:44,705][626795] Updated weights for policy 0, policy_version 255962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:46,771][626795] Updated weights for policy 0, policy_version 255972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:48,725][626795] Updated weights for policy 0, policy_version 255982 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:48,975][24592] Fps is (10 sec: 41780.5, 60 sec: 41779.2, 300 sec: 41931.9). Total num frames: 2097012736. Throughput: 0: 10422.9. Samples: 274240974. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:48,976][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:50,709][626795] Updated weights for policy 0, policy_version 255992 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:52,614][626795] Updated weights for policy 0, policy_version 256002 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:53,976][24592] Fps is (10 sec: 41776.7, 60 sec: 41642.2, 300 sec: 41931.9). Total num frames: 2097217536. Throughput: 0: 10413.4. Samples: 274303110. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:53,977][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:54,605][626795] Updated weights for policy 0, policy_version 256012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:56,684][626795] Updated weights for policy 0, policy_version 256022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:27:58,490][626795] Updated weights for policy 0, policy_version 256032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:58,981][24592] Fps is (10 sec: 40937.4, 60 sec: 41649.7, 300 sec: 41875.6). Total num frames: 2097422336. Throughput: 0: 10423.4. Samples: 274334400. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:27:58,982][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:00,635][626795] Updated weights for policy 0, policy_version 256042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:02,460][626795] Updated weights for policy 0, policy_version 256052 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:03,975][24592] Fps is (10 sec: 41781.9, 60 sec: 41642.7, 300 sec: 41904.2). Total num frames: 2097635328. Throughput: 0: 10445.3. Samples: 274397232. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:03,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:04,508][626795] Updated weights for policy 0, policy_version 256062 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:06,357][626795] Updated weights for policy 0, policy_version 256072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:08,398][626795] Updated weights for policy 0, policy_version 256082 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:08,976][24592] Fps is (10 sec: 42621.3, 60 sec: 41642.7, 300 sec: 41931.9). Total num frames: 2097848320. Throughput: 0: 10445.6. Samples: 274460328. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:08,977][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:10,352][626795] Updated weights for policy 0, policy_version 256092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:12,237][626795] Updated weights for policy 0, policy_version 256102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:13,976][24592] Fps is (10 sec: 41776.9, 60 sec: 41643.6, 300 sec: 41904.1). Total num frames: 2098053120. Throughput: 0: 10448.2. Samples: 274491708. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:13,978][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:14,198][626795] Updated weights for policy 0, policy_version 256112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:16,210][626795] Updated weights for policy 0, policy_version 256122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:18,157][626795] Updated weights for policy 0, policy_version 256132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:18,975][24592] Fps is (10 sec: 41780.0, 60 sec: 41780.0, 300 sec: 41931.9). Total num frames: 2098266112. Throughput: 0: 10440.3. Samples: 274554414. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:18,977][24592] Avg episode reward: [(0, '4.779')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:20,043][626795] Updated weights for policy 0, policy_version 256142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:22,087][626795] Updated weights for policy 0, policy_version 256152 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:23,975][24592] Fps is (10 sec: 41781.3, 60 sec: 41642.9, 300 sec: 41904.2). Total num frames: 2098470912. Throughput: 0: 10418.5. Samples: 274616808. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:23,978][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:24,049][626795] Updated weights for policy 0, policy_version 256162 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:25,930][626795] Updated weights for policy 0, policy_version 256172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:27,889][626795] Updated weights for policy 0, policy_version 256182 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:28,981][24592] Fps is (10 sec: 41755.1, 60 sec: 41775.2, 300 sec: 41903.3). Total num frames: 2098683904. Throughput: 0: 10431.7. Samples: 274648188. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:28,982][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:29,849][626795] Updated weights for policy 0, policy_version 256192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:31,783][626795] Updated weights for policy 0, policy_version 256202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:33,769][626795] Updated weights for policy 0, policy_version 256212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:33,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41779.2, 300 sec: 41931.9). Total num frames: 2098896896. Throughput: 0: 10451.5. Samples: 274711290. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:33,976][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:35,741][626795] Updated weights for policy 0, policy_version 256222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:37,590][626795] Updated weights for policy 0, policy_version 256232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:38,975][24592] Fps is (10 sec: 42622.8, 60 sec: 41916.0, 300 sec: 41931.9). Total num frames: 2099109888. Throughput: 0: 10473.3. Samples: 274774404. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:38,977][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:39,646][626795] Updated weights for policy 0, policy_version 256242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:41,616][626795] Updated weights for policy 0, policy_version 256252 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:43,508][626795] Updated weights for policy 0, policy_version 256262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:43,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41779.2, 300 sec: 41876.4). Total num frames: 2099306496. Throughput: 0: 10475.2. Samples: 274805724. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:43,978][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:45,453][626795] Updated weights for policy 0, policy_version 256272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:47,455][626795] Updated weights for policy 0, policy_version 256282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:48,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41779.0, 300 sec: 41848.6). Total num frames: 2099519488. Throughput: 0: 10472.7. Samples: 274868508. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:48,977][24592] Avg episode reward: [(0, '5.088')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:49,471][626795] Updated weights for policy 0, policy_version 256292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:51,436][626795] Updated weights for policy 0, policy_version 256302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:53,439][626795] Updated weights for policy 0, policy_version 256312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:53,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.6, 300 sec: 41849.4). Total num frames: 2099724288. Throughput: 0: 10448.2. Samples: 274930494. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:53,977][24592] Avg episode reward: [(0, '4.459')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:55,351][626795] Updated weights for policy 0, policy_version 256322 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:57,396][626795] Updated weights for policy 0, policy_version 256332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:58,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41919.7, 300 sec: 41848.6). Total num frames: 2099937280. Throughput: 0: 10439.7. Samples: 274961490. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:28:58,977][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:28:59,213][626795] Updated weights for policy 0, policy_version 256342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:01,166][626795] Updated weights for policy 0, policy_version 256352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:03,192][626795] Updated weights for policy 0, policy_version 256362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:03,976][24592] Fps is (10 sec: 42596.7, 60 sec: 41915.5, 300 sec: 41876.4). Total num frames: 2100150272. Throughput: 0: 10455.1. Samples: 275024898. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:03,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000256366_2100150272.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:04,100][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000255141_2090115072.pth\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:04,691][626772] Signal inference workers to stop experience collection... (3650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:04,692][626772] Signal inference workers to resume experience collection... (3650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:04,699][626795] InferenceWorker_p0-w0: stopping experience collection (3650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:04,700][626795] InferenceWorker_p0-w0: resuming experience collection (3650 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:05,195][626795] Updated weights for policy 0, policy_version 256372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:07,156][626795] Updated weights for policy 0, policy_version 256382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:08,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.3, 300 sec: 41848.6). Total num frames: 2100355072. Throughput: 0: 10439.9. Samples: 275086602. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:08,977][24592] Avg episode reward: [(0, '4.414')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:09,156][626795] Updated weights for policy 0, policy_version 256392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:11,133][626795] Updated weights for policy 0, policy_version 256402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:13,063][626795] Updated weights for policy 0, policy_version 256412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:13,976][24592] Fps is (10 sec: 40959.6, 60 sec: 41779.2, 300 sec: 41821.0). Total num frames: 2100559872. Throughput: 0: 10426.6. Samples: 275117328. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:13,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:15,109][626795] Updated weights for policy 0, policy_version 256422 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:16,970][626795] Updated weights for policy 0, policy_version 256432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:18,976][24592] Fps is (10 sec: 40960.1, 60 sec: 41642.7, 300 sec: 41793.1). Total num frames: 2100764672. Throughput: 0: 10409.2. Samples: 275179704. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:18,977][24592] Avg episode reward: [(0, '4.354')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:18,996][626795] Updated weights for policy 0, policy_version 256442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:20,991][626795] Updated weights for policy 0, policy_version 256452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:22,959][626795] Updated weights for policy 0, policy_version 256462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:23,981][24592] Fps is (10 sec: 41756.4, 60 sec: 41775.1, 300 sec: 41820.0). Total num frames: 2100977664. Throughput: 0: 10394.0. Samples: 275242194. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:23,983][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:24,983][626795] Updated weights for policy 0, policy_version 256472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:26,858][626795] Updated weights for policy 0, policy_version 256482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:28,856][626795] Updated weights for policy 0, policy_version 256492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:28,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41783.2, 300 sec: 41820.8). Total num frames: 2101190656. Throughput: 0: 10379.6. Samples: 275272806. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:28,976][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:30,895][626795] Updated weights for policy 0, policy_version 256502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:32,723][626795] Updated weights for policy 0, policy_version 256512 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:33,975][24592] Fps is (10 sec: 41804.0, 60 sec: 41642.7, 300 sec: 41793.1). Total num frames: 2101395456. Throughput: 0: 10396.2. Samples: 275336334. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:33,977][24592] Avg episode reward: [(0, '4.864')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:34,722][626795] Updated weights for policy 0, policy_version 256522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:36,613][626795] Updated weights for policy 0, policy_version 256532 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:38,687][626795] Updated weights for policy 0, policy_version 256542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:38,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41642.6, 300 sec: 41820.9). Total num frames: 2101608448. Throughput: 0: 10428.2. Samples: 275399766. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:38,977][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:40,548][626795] Updated weights for policy 0, policy_version 256552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:42,449][626795] Updated weights for policy 0, policy_version 256562 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:43,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41779.2, 300 sec: 41793.1). Total num frames: 2101813248. Throughput: 0: 10425.7. Samples: 275430648. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:43,977][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:44,458][626795] Updated weights for policy 0, policy_version 256572 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:46,490][626795] Updated weights for policy 0, policy_version 256582 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:48,289][626795] Updated weights for policy 0, policy_version 256592 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:48,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.5, 300 sec: 41793.1). Total num frames: 2102026240. Throughput: 0: 10418.4. Samples: 275493720. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:48,977][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:50,293][626795] Updated weights for policy 0, policy_version 256602 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:52,218][626795] Updated weights for policy 0, policy_version 256612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:53,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.1, 300 sec: 41765.3). Total num frames: 2102231040. Throughput: 0: 10430.8. Samples: 275555988. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:53,976][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:54,295][626795] Updated weights for policy 0, policy_version 256622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:56,271][626795] Updated weights for policy 0, policy_version 256632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:29:58,253][626795] Updated weights for policy 0, policy_version 256642 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:58,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41642.7, 300 sec: 41765.3). Total num frames: 2102435840. Throughput: 0: 10429.6. Samples: 275586654. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:29:58,977][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:00,229][626795] Updated weights for policy 0, policy_version 256652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:02,237][626795] Updated weights for policy 0, policy_version 256662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:03,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41643.0, 300 sec: 41793.1). Total num frames: 2102648832. Throughput: 0: 10432.8. Samples: 275649180. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:03,976][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:04,088][626795] Updated weights for policy 0, policy_version 256672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:06,146][626795] Updated weights for policy 0, policy_version 256682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:08,007][626795] Updated weights for policy 0, policy_version 256692 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:08,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41642.7, 300 sec: 41737.6). Total num frames: 2102853632. Throughput: 0: 10448.2. Samples: 275712300. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:08,979][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:10,038][626795] Updated weights for policy 0, policy_version 256702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:11,871][626795] Updated weights for policy 0, policy_version 256712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:13,823][626795] Updated weights for policy 0, policy_version 256722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:13,976][24592] Fps is (10 sec: 41777.4, 60 sec: 41779.3, 300 sec: 41765.3). Total num frames: 2103066624. Throughput: 0: 10473.8. Samples: 275744130. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:13,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:15,839][626795] Updated weights for policy 0, policy_version 256732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:17,845][626795] Updated weights for policy 0, policy_version 256742 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:18,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.2, 300 sec: 41737.5). Total num frames: 2103271424. Throughput: 0: 10444.1. Samples: 275806320. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:18,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:19,905][626795] Updated weights for policy 0, policy_version 256752 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:21,907][626795] Updated weights for policy 0, policy_version 256762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:23,731][626795] Updated weights for policy 0, policy_version 256772 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:23,975][24592] Fps is (10 sec: 40961.3, 60 sec: 41646.7, 300 sec: 41737.5). Total num frames: 2103476224. Throughput: 0: 10409.2. Samples: 275868180. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:23,976][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:25,793][626795] Updated weights for policy 0, policy_version 256782 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:27,852][626795] Updated weights for policy 0, policy_version 256792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:28,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41506.1, 300 sec: 41709.8). Total num frames: 2103681024. Throughput: 0: 10396.4. Samples: 275898486. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:28,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:29,864][626795] Updated weights for policy 0, policy_version 256802 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:31,716][626795] Updated weights for policy 0, policy_version 256812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:33,764][626795] Updated weights for policy 0, policy_version 256822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:33,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41642.7, 300 sec: 41737.6). Total num frames: 2103894016. Throughput: 0: 10382.1. Samples: 275960916. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:33,978][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:35,626][626795] Updated weights for policy 0, policy_version 256832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:37,638][626795] Updated weights for policy 0, policy_version 256842 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:38,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41505.9, 300 sec: 41737.5). Total num frames: 2104098816. Throughput: 0: 10395.4. Samples: 276023784. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:38,978][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:39,527][626795] Updated weights for policy 0, policy_version 256852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:41,505][626795] Updated weights for policy 0, policy_version 256862 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:43,478][626795] Updated weights for policy 0, policy_version 256872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:43,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41642.7, 300 sec: 41737.6). Total num frames: 2104311808. Throughput: 0: 10414.1. Samples: 276055290. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:43,977][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:45,463][626795] Updated weights for policy 0, policy_version 256882 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:47,375][626795] Updated weights for policy 0, policy_version 256892 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:48,975][24592] Fps is (10 sec: 42600.0, 60 sec: 41642.7, 300 sec: 41737.6). Total num frames: 2104524800. Throughput: 0: 10432.1. Samples: 276118626. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:48,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:49,350][626795] Updated weights for policy 0, policy_version 256902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:51,365][626795] Updated weights for policy 0, policy_version 256912 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:53,220][626795] Updated weights for policy 0, policy_version 256922 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:53,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41642.7, 300 sec: 41737.5). Total num frames: 2104729600. Throughput: 0: 10406.9. Samples: 276180612. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:53,976][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:55,298][626795] Updated weights for policy 0, policy_version 256932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:57,314][626795] Updated weights for policy 0, policy_version 256942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:58,976][24592] Fps is (10 sec: 41777.4, 60 sec: 41778.9, 300 sec: 41737.5). Total num frames: 2104942592. Throughput: 0: 10394.5. Samples: 276211884. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:30:58,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:30:59,159][626795] Updated weights for policy 0, policy_version 256952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:01,114][626795] Updated weights for policy 0, policy_version 256962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:03,068][626795] Updated weights for policy 0, policy_version 256972 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:03,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41642.6, 300 sec: 41737.7). Total num frames: 2105147392. Throughput: 0: 10405.4. Samples: 276274566. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:03,976][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000256976_2105147392.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:04,100][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000255754_2095136768.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:05,169][626795] Updated weights for policy 0, policy_version 256982 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:07,086][626795] Updated weights for policy 0, policy_version 256992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:08,960][626795] Updated weights for policy 0, policy_version 257002 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:08,975][24592] Fps is (10 sec: 41781.1, 60 sec: 41779.2, 300 sec: 41765.3). Total num frames: 2105360384. Throughput: 0: 10431.5. Samples: 276337596. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:08,976][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:10,899][626795] Updated weights for policy 0, policy_version 257012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:12,953][626795] Updated weights for policy 0, policy_version 257022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:13,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41642.9, 300 sec: 41737.5). Total num frames: 2105565184. Throughput: 0: 10458.8. Samples: 276369132. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:13,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:14,873][626795] Updated weights for policy 0, policy_version 257032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:16,776][626795] Updated weights for policy 0, policy_version 257042 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:18,722][626795] Updated weights for policy 0, policy_version 257052 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41779.2, 300 sec: 41765.3). Total num frames: 2105778176. Throughput: 0: 10471.1. Samples: 276432114. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:18,976][24592] Avg episode reward: [(0, '4.410')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:20,727][626795] Updated weights for policy 0, policy_version 257062 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:22,588][626795] Updated weights for policy 0, policy_version 257072 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:23,976][24592] Fps is (10 sec: 41776.8, 60 sec: 41778.8, 300 sec: 41737.5). Total num frames: 2105982976. Throughput: 0: 10473.6. Samples: 276495096. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:23,978][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:24,647][626795] Updated weights for policy 0, policy_version 257082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:26,527][626795] Updated weights for policy 0, policy_version 257092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:28,453][626795] Updated weights for policy 0, policy_version 257102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:28,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41915.8, 300 sec: 41737.6). Total num frames: 2106195968. Throughput: 0: 10469.1. Samples: 276526398. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:28,978][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:30,492][626795] Updated weights for policy 0, policy_version 257112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:32,521][626795] Updated weights for policy 0, policy_version 257122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:33,975][24592] Fps is (10 sec: 41781.5, 60 sec: 41779.2, 300 sec: 41737.6). Total num frames: 2106400768. Throughput: 0: 10442.9. Samples: 276588558. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:33,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:34,576][626795] Updated weights for policy 0, policy_version 257132 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:36,508][626795] Updated weights for policy 0, policy_version 257142 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:38,307][626795] Updated weights for policy 0, policy_version 257152 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:38,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41916.0, 300 sec: 41765.4). Total num frames: 2106613760. Throughput: 0: 10446.4. Samples: 276650700. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:38,977][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:40,247][626795] Updated weights for policy 0, policy_version 257162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:42,326][626795] Updated weights for policy 0, policy_version 257172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.2, 300 sec: 41737.6). Total num frames: 2106818560. Throughput: 0: 10453.2. Samples: 276682272. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:43,977][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:44,204][626795] Updated weights for policy 0, policy_version 257182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:46,169][626795] Updated weights for policy 0, policy_version 257192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:48,098][626795] Updated weights for policy 0, policy_version 257202 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.2, 300 sec: 41737.5). Total num frames: 2107031552. Throughput: 0: 10475.5. Samples: 276745962. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:48,978][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:50,086][626795] Updated weights for policy 0, policy_version 257212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:52,019][626795] Updated weights for policy 0, policy_version 257222 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:53,918][626795] Updated weights for policy 0, policy_version 257232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:53,976][24592] Fps is (10 sec: 42596.4, 60 sec: 41915.4, 300 sec: 41767.5). Total num frames: 2107244544. Throughput: 0: 10482.3. Samples: 276809304. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:53,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:55,916][626795] Updated weights for policy 0, policy_version 257242 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:57,715][626795] Updated weights for policy 0, policy_version 257252 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.5, 300 sec: 41737.6). Total num frames: 2107449344. Throughput: 0: 10476.5. Samples: 276840576. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:31:58,978][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:31:59,788][626795] Updated weights for policy 0, policy_version 257262 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:01,745][626795] Updated weights for policy 0, policy_version 257272 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:03,641][626795] Updated weights for policy 0, policy_version 257282 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:03,976][24592] Fps is (10 sec: 40961.3, 60 sec: 41779.1, 300 sec: 41709.8). Total num frames: 2107654144. Throughput: 0: 10467.8. Samples: 276903168. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:03,976][24592] Avg episode reward: [(0, '4.959')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:05,710][626795] Updated weights for policy 0, policy_version 257292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:07,744][626795] Updated weights for policy 0, policy_version 257302 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:08,975][24592] Fps is (10 sec: 41778.6, 60 sec: 41779.1, 300 sec: 41737.8). Total num frames: 2107867136. Throughput: 0: 10445.7. Samples: 276965148. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:08,978][24592] Avg episode reward: [(0, '4.318')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:09,737][626795] Updated weights for policy 0, policy_version 257312 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:11,681][626795] Updated weights for policy 0, policy_version 257322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:13,603][626795] Updated weights for policy 0, policy_version 257332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:13,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41779.2, 300 sec: 41737.7). Total num frames: 2108071936. Throughput: 0: 10441.9. Samples: 276996282. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:13,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:15,541][626795] Updated weights for policy 0, policy_version 257342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:17,416][626795] Updated weights for policy 0, policy_version 257352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:18,975][24592] Fps is (10 sec: 42599.0, 60 sec: 41915.7, 300 sec: 41765.4). Total num frames: 2108293120. Throughput: 0: 10467.2. Samples: 277059582. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:18,976][24592] Avg episode reward: [(0, '4.855')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:19,498][626795] Updated weights for policy 0, policy_version 257362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:21,438][626795] Updated weights for policy 0, policy_version 257372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:23,329][626795] Updated weights for policy 0, policy_version 257382 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:23,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41916.1, 300 sec: 41765.3). Total num frames: 2108497920. Throughput: 0: 10483.5. Samples: 277122456. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:23,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:25,336][626795] Updated weights for policy 0, policy_version 257392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:27,169][626795] Updated weights for policy 0, policy_version 257402 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:28,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41779.2, 300 sec: 41737.5). Total num frames: 2108702720. Throughput: 0: 10477.0. Samples: 277153740. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:28,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:29,283][626795] Updated weights for policy 0, policy_version 257412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:31,110][626795] Updated weights for policy 0, policy_version 257422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:33,043][626795] Updated weights for policy 0, policy_version 257432 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.8, 300 sec: 41765.4). Total num frames: 2108915712. Throughput: 0: 10463.5. Samples: 277216818. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:33,980][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:35,100][626795] Updated weights for policy 0, policy_version 257442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:37,124][626795] Updated weights for policy 0, policy_version 257452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:38,848][626795] Updated weights for policy 0, policy_version 257462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:38,976][24592] Fps is (10 sec: 42597.2, 60 sec: 41915.5, 300 sec: 41793.0). Total num frames: 2109128704. Throughput: 0: 10451.8. Samples: 277279632. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:38,978][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:40,890][626795] Updated weights for policy 0, policy_version 257472 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:42,879][626795] Updated weights for policy 0, policy_version 257482 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:43,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41915.6, 300 sec: 41765.3). Total num frames: 2109333504. Throughput: 0: 10442.7. Samples: 277310502. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:43,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:44,929][626795] Updated weights for policy 0, policy_version 257492 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:47,074][626795] Updated weights for policy 0, policy_version 257502 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:48,975][24592] Fps is (10 sec: 39323.4, 60 sec: 41506.2, 300 sec: 41709.9). Total num frames: 2109521920. Throughput: 0: 10388.5. Samples: 277370646. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:48,977][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:49,192][626795] Updated weights for policy 0, policy_version 257512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:51,137][626795] Updated weights for policy 0, policy_version 257522 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:53,072][626795] Updated weights for policy 0, policy_version 257532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:53,975][24592] Fps is (10 sec: 40141.4, 60 sec: 41506.4, 300 sec: 41738.3). Total num frames: 2109734912. Throughput: 0: 10381.0. Samples: 277432290. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:53,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:55,105][626795] Updated weights for policy 0, policy_version 257542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:56,937][626795] Updated weights for policy 0, policy_version 257552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:32:58,919][626795] Updated weights for policy 0, policy_version 257562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:58,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41642.7, 300 sec: 41737.5). Total num frames: 2109947904. Throughput: 0: 10376.0. Samples: 277463202. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:32:58,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:00,993][626795] Updated weights for policy 0, policy_version 257572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:02,855][626795] Updated weights for policy 0, policy_version 257582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:03,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41642.7, 300 sec: 41709.8). Total num frames: 2110152704. Throughput: 0: 10373.0. Samples: 277526370. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:03,977][24592] Avg episode reward: [(0, '4.415')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:03,995][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000257588_2110160896.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:04,076][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000256366_2100150272.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:04,944][626795] Updated weights for policy 0, policy_version 257592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:06,963][626795] Updated weights for policy 0, policy_version 257602 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:08,785][626795] Updated weights for policy 0, policy_version 257612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:08,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41506.2, 300 sec: 41709.8). Total num frames: 2110357504. Throughput: 0: 10354.3. Samples: 277588398. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:08,976][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:10,844][626795] Updated weights for policy 0, policy_version 257622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:12,768][626795] Updated weights for policy 0, policy_version 257632 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:13,976][24592] Fps is (10 sec: 41778.9, 60 sec: 41642.5, 300 sec: 41709.7). Total num frames: 2110570496. Throughput: 0: 10341.0. Samples: 277619088. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:13,977][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:14,883][626795] Updated weights for policy 0, policy_version 257642 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:16,811][626795] Updated weights for policy 0, policy_version 257652 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:18,776][626795] Updated weights for policy 0, policy_version 257662 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:18,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41369.6, 300 sec: 41709.8). Total num frames: 2110775296. Throughput: 0: 10311.5. Samples: 277680834. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:18,976][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:20,627][626795] Updated weights for policy 0, policy_version 257672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:22,605][626795] Updated weights for policy 0, policy_version 257682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:23,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41506.1, 300 sec: 41710.6). Total num frames: 2110988288. Throughput: 0: 10329.7. Samples: 277744464. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:23,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:24,597][626795] Updated weights for policy 0, policy_version 257692 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:26,636][626795] Updated weights for policy 0, policy_version 257702 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:28,504][626795] Updated weights for policy 0, policy_version 257712 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:28,976][24592] Fps is (10 sec: 41777.5, 60 sec: 41505.9, 300 sec: 41682.0). Total num frames: 2111193088. Throughput: 0: 10333.8. Samples: 277775526. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:28,979][24592] Avg episode reward: [(0, '5.013')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:30,528][626795] Updated weights for policy 0, policy_version 257722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:32,316][626795] Updated weights for policy 0, policy_version 257732 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:33,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41369.6, 300 sec: 41654.2). Total num frames: 2111397888. Throughput: 0: 10400.5. Samples: 277838670. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:33,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:34,420][626795] Updated weights for policy 0, policy_version 257742 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:36,449][626795] Updated weights for policy 0, policy_version 257752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:38,280][626795] Updated weights for policy 0, policy_version 257762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:38,975][24592] Fps is (10 sec: 41780.6, 60 sec: 41369.8, 300 sec: 41709.8). Total num frames: 2111610880. Throughput: 0: 10408.5. Samples: 277900674. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:38,976][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:40,317][626795] Updated weights for policy 0, policy_version 257772 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:43,975][24592] Fps is (10 sec: 34406.5, 60 sec: 40141.0, 300 sec: 41432.1). Total num frames: 2111741952. Throughput: 0: 10408.4. Samples: 277931580. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:43,976][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:44,628][626795] Updated weights for policy 0, policy_version 257782 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:46,470][626795] Updated weights for policy 0, policy_version 257792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:48,424][626795] Updated weights for policy 0, policy_version 257802 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:48,975][24592] Fps is (10 sec: 31948.9, 60 sec: 40140.8, 300 sec: 41376.5). Total num frames: 2111930368. Throughput: 0: 9872.0. Samples: 277970610. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:48,978][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:50,400][626795] Updated weights for policy 0, policy_version 257812 (0.0034)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:52,406][626795] Updated weights for policy 0, policy_version 257822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:53,976][24592] Fps is (10 sec: 39321.5, 60 sec: 40004.3, 300 sec: 41348.8). Total num frames: 2112135168. Throughput: 0: 9902.8. Samples: 278034024. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:53,976][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:54,268][626795] Updated weights for policy 0, policy_version 257832 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:56,288][626795] Updated weights for policy 0, policy_version 257842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:33:58,181][626795] Updated weights for policy 0, policy_version 257852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:58,975][24592] Fps is (10 sec: 42598.5, 60 sec: 40140.8, 300 sec: 41376.6). Total num frames: 2112356352. Throughput: 0: 9910.7. Samples: 278065068. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:33:58,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:00,198][626795] Updated weights for policy 0, policy_version 257862 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:02,156][626795] Updated weights for policy 0, policy_version 257872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:03,975][24592] Fps is (10 sec: 42598.0, 60 sec: 40140.8, 300 sec: 41376.5). Total num frames: 2112561152. Throughput: 0: 9933.6. Samples: 278127846. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:03,976][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:04,167][626795] Updated weights for policy 0, policy_version 257882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:06,030][626795] Updated weights for policy 0, policy_version 257892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:07,974][626795] Updated weights for policy 0, policy_version 257902 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:08,976][24592] Fps is (10 sec: 40959.3, 60 sec: 40140.7, 300 sec: 41376.6). Total num frames: 2112765952. Throughput: 0: 9920.2. Samples: 278190876. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:08,978][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:09,987][626795] Updated weights for policy 0, policy_version 257912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:12,054][626795] Updated weights for policy 0, policy_version 257922 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:13,873][626795] Updated weights for policy 0, policy_version 257932 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40140.9, 300 sec: 41404.3). Total num frames: 2112978944. Throughput: 0: 9905.9. Samples: 278221290. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:13,977][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:15,925][626795] Updated weights for policy 0, policy_version 257942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:17,903][626795] Updated weights for policy 0, policy_version 257952 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:18,975][24592] Fps is (10 sec: 41779.7, 60 sec: 40140.8, 300 sec: 41377.4). Total num frames: 2113183744. Throughput: 0: 9900.8. Samples: 278284206. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:18,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:19,892][626795] Updated weights for policy 0, policy_version 257962 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:21,803][626795] Updated weights for policy 0, policy_version 257972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:23,736][626795] Updated weights for policy 0, policy_version 257982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:23,976][24592] Fps is (10 sec: 41777.8, 60 sec: 40140.5, 300 sec: 41376.5). Total num frames: 2113396736. Throughput: 0: 9913.6. Samples: 278346792. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:23,978][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:25,817][626795] Updated weights for policy 0, policy_version 257992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:27,658][626795] Updated weights for policy 0, policy_version 258002 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:28,976][24592] Fps is (10 sec: 41778.1, 60 sec: 40140.9, 300 sec: 41376.5). Total num frames: 2113601536. Throughput: 0: 9918.9. Samples: 278377932. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:28,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:29,681][626795] Updated weights for policy 0, policy_version 258012 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:31,595][626795] Updated weights for policy 0, policy_version 258022 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:33,499][626795] Updated weights for policy 0, policy_version 258032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:33,975][24592] Fps is (10 sec: 41781.1, 60 sec: 40277.4, 300 sec: 41376.6). Total num frames: 2113814528. Throughput: 0: 10458.5. Samples: 278441244. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:33,977][24592] Avg episode reward: [(0, '4.844')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:35,426][626795] Updated weights for policy 0, policy_version 258042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:37,436][626795] Updated weights for policy 0, policy_version 258052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:38,975][24592] Fps is (10 sec: 41780.4, 60 sec: 40140.8, 300 sec: 41376.5). Total num frames: 2114019328. Throughput: 0: 10443.1. Samples: 278503962. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:38,976][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:39,311][626795] Updated weights for policy 0, policy_version 258062 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:41,353][626795] Updated weights for policy 0, policy_version 258072 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:43,308][626795] Updated weights for policy 0, policy_version 258082 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:43,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41506.1, 300 sec: 41376.5). Total num frames: 2114232320. Throughput: 0: 10445.6. Samples: 278535120. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:43,977][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:45,318][626795] Updated weights for policy 0, policy_version 258092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:47,291][626795] Updated weights for policy 0, policy_version 258102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:48,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41779.2, 300 sec: 41376.5). Total num frames: 2114437120. Throughput: 0: 10437.5. Samples: 278597532. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:48,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:49,313][626795] Updated weights for policy 0, policy_version 258112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:51,219][626795] Updated weights for policy 0, policy_version 258122 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:53,164][626795] Updated weights for policy 0, policy_version 258132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:53,976][24592] Fps is (10 sec: 41778.9, 60 sec: 41915.6, 300 sec: 41404.3). Total num frames: 2114650112. Throughput: 0: 10425.9. Samples: 278660040. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:53,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:55,165][626795] Updated weights for policy 0, policy_version 258142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:57,161][626795] Updated weights for policy 0, policy_version 258152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:58,976][24592] Fps is (10 sec: 41777.8, 60 sec: 41642.4, 300 sec: 41376.5). Total num frames: 2114854912. Throughput: 0: 10431.9. Samples: 278690730. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:34:58,977][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:34:59,053][626795] Updated weights for policy 0, policy_version 258162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:01,095][626795] Updated weights for policy 0, policy_version 258172 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:01,118][626772] Signal inference workers to stop experience collection... (3700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:01,127][626772] Signal inference workers to resume experience collection... (3700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:01,137][626795] InferenceWorker_p0-w0: stopping experience collection (3700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:01,140][626795] InferenceWorker_p0-w0: resuming experience collection (3700 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:02,916][626795] Updated weights for policy 0, policy_version 258182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:03,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41642.7, 300 sec: 41376.5). Total num frames: 2115059712. Throughput: 0: 10440.7. Samples: 278754036. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:03,977][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:04,048][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000258187_2115067904.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:04,116][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000256976_2105147392.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:04,957][626795] Updated weights for policy 0, policy_version 258192 (0.0044)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:06,887][626795] Updated weights for policy 0, policy_version 258202 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:08,858][626795] Updated weights for policy 0, policy_version 258212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:08,975][24592] Fps is (10 sec: 41780.8, 60 sec: 41779.3, 300 sec: 41376.6). Total num frames: 2115272704. Throughput: 0: 10440.1. Samples: 278816592. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:08,977][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:10,746][626795] Updated weights for policy 0, policy_version 258222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:12,757][626795] Updated weights for policy 0, policy_version 258232 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:13,975][24592] Fps is (10 sec: 42598.0, 60 sec: 41779.2, 300 sec: 41404.3). Total num frames: 2115485696. Throughput: 0: 10445.5. Samples: 278847978. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:13,977][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:14,675][626795] Updated weights for policy 0, policy_version 258242 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:16,708][626795] Updated weights for policy 0, policy_version 258252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:18,586][626795] Updated weights for policy 0, policy_version 258262 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:18,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41779.1, 300 sec: 41404.3). Total num frames: 2115690496. Throughput: 0: 10444.1. Samples: 278911230. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:18,976][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:20,662][626795] Updated weights for policy 0, policy_version 258272 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:22,573][626795] Updated weights for policy 0, policy_version 258282 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:23,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41779.3, 300 sec: 41432.1). Total num frames: 2115903488. Throughput: 0: 10437.9. Samples: 278973672. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:23,977][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:24,576][626795] Updated weights for policy 0, policy_version 258292 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:26,445][626795] Updated weights for policy 0, policy_version 258302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:28,474][626795] Updated weights for policy 0, policy_version 258312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:28,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41779.4, 300 sec: 41404.3). Total num frames: 2116108288. Throughput: 0: 10439.3. Samples: 279004890. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:28,976][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:30,346][626795] Updated weights for policy 0, policy_version 258322 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:32,355][626795] Updated weights for policy 0, policy_version 258332 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:33,976][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.0, 300 sec: 41432.1). Total num frames: 2116321280. Throughput: 0: 10449.1. Samples: 279067746. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:33,978][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:34,248][626795] Updated weights for policy 0, policy_version 258342 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:36,255][626795] Updated weights for policy 0, policy_version 258352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:38,193][626795] Updated weights for policy 0, policy_version 258362 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:38,976][24592] Fps is (10 sec: 42596.2, 60 sec: 41915.4, 300 sec: 41432.0). Total num frames: 2116534272. Throughput: 0: 10467.9. Samples: 279131100. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:38,978][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:40,212][626795] Updated weights for policy 0, policy_version 258372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:42,179][626795] Updated weights for policy 0, policy_version 258382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:43,976][24592] Fps is (10 sec: 41779.0, 60 sec: 41779.0, 300 sec: 41404.3). Total num frames: 2116739072. Throughput: 0: 10470.7. Samples: 279161910. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:43,977][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:44,068][626795] Updated weights for policy 0, policy_version 258392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:46,127][626795] Updated weights for policy 0, policy_version 258402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:48,130][626795] Updated weights for policy 0, policy_version 258412 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:48,975][24592] Fps is (10 sec: 40962.0, 60 sec: 41779.2, 300 sec: 41404.3). Total num frames: 2116943872. Throughput: 0: 10432.1. Samples: 279223482. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:48,976][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:50,192][626795] Updated weights for policy 0, policy_version 258422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:52,143][626795] Updated weights for policy 0, policy_version 258432 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:53,975][24592] Fps is (10 sec: 40961.2, 60 sec: 41642.8, 300 sec: 41376.6). Total num frames: 2117148672. Throughput: 0: 10413.1. Samples: 279285180. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:53,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:54,273][626795] Updated weights for policy 0, policy_version 258442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:56,090][626795] Updated weights for policy 0, policy_version 258452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:35:58,132][626795] Updated weights for policy 0, policy_version 258462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:58,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41643.0, 300 sec: 41376.6). Total num frames: 2117353472. Throughput: 0: 10391.1. Samples: 279315576. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:35:58,976][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:00,058][626795] Updated weights for policy 0, policy_version 258472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:02,164][626795] Updated weights for policy 0, policy_version 258482 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:03,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41642.7, 300 sec: 41348.8). Total num frames: 2117558272. Throughput: 0: 10346.7. Samples: 279376830. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:03,976][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:04,155][626795] Updated weights for policy 0, policy_version 258492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:06,191][626795] Updated weights for policy 0, policy_version 258502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:08,085][626795] Updated weights for policy 0, policy_version 258512 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:08,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41506.1, 300 sec: 41348.8). Total num frames: 2117763072. Throughput: 0: 10341.7. Samples: 279439044. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:08,976][24592] Avg episode reward: [(0, '4.830')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:10,094][626795] Updated weights for policy 0, policy_version 258522 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:12,084][626795] Updated weights for policy 0, policy_version 258532 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:13,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41369.7, 300 sec: 41321.0). Total num frames: 2117967872. Throughput: 0: 10337.2. Samples: 279470064. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:13,976][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:14,019][626795] Updated weights for policy 0, policy_version 258542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:16,002][626795] Updated weights for policy 0, policy_version 258552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:18,006][626795] Updated weights for policy 0, policy_version 258562 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:18,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41369.6, 300 sec: 41321.1). Total num frames: 2118172672. Throughput: 0: 10326.4. Samples: 279532434. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:18,976][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:20,017][626795] Updated weights for policy 0, policy_version 258572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:22,014][626795] Updated weights for policy 0, policy_version 258582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:23,905][626795] Updated weights for policy 0, policy_version 258592 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.9, 300 sec: 41321.0). Total num frames: 2118385664. Throughput: 0: 10287.5. Samples: 279594030. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:23,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:25,959][626795] Updated weights for policy 0, policy_version 258602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:27,959][626795] Updated weights for policy 0, policy_version 258612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:28,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41369.4, 300 sec: 41321.0). Total num frames: 2118590464. Throughput: 0: 10288.7. Samples: 279624900. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:28,978][24592] Avg episode reward: [(0, '4.936')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:29,979][626795] Updated weights for policy 0, policy_version 258622 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:31,830][626795] Updated weights for policy 0, policy_version 258632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:33,866][626795] Updated weights for policy 0, policy_version 258642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:33,975][24592] Fps is (10 sec: 40959.5, 60 sec: 41233.2, 300 sec: 41293.2). Total num frames: 2118795264. Throughput: 0: 10285.3. Samples: 279686322. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:33,976][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:35,710][626795] Updated weights for policy 0, policy_version 258652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:37,767][626795] Updated weights for policy 0, policy_version 258662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:38,976][24592] Fps is (10 sec: 41778.9, 60 sec: 41233.2, 300 sec: 41321.0). Total num frames: 2119008256. Throughput: 0: 10330.3. Samples: 279750048. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:38,979][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:39,798][626795] Updated weights for policy 0, policy_version 258672 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:41,719][626795] Updated weights for policy 0, policy_version 258682 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:43,676][626795] Updated weights for policy 0, policy_version 258692 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:43,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.2, 300 sec: 41293.2). Total num frames: 2119213056. Throughput: 0: 10353.7. Samples: 279781494. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:43,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:45,726][626795] Updated weights for policy 0, policy_version 258702 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:47,610][626795] Updated weights for policy 0, policy_version 258712 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:48,975][24592] Fps is (10 sec: 40961.4, 60 sec: 41233.1, 300 sec: 41265.5). Total num frames: 2119417856. Throughput: 0: 10360.9. Samples: 279843072. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:48,977][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:49,597][626795] Updated weights for policy 0, policy_version 258722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:51,554][626795] Updated weights for policy 0, policy_version 258732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:53,671][626795] Updated weights for policy 0, policy_version 258742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:53,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41233.0, 300 sec: 41265.5). Total num frames: 2119622656. Throughput: 0: 10352.1. Samples: 279904890. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:53,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:55,704][626795] Updated weights for policy 0, policy_version 258752 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:57,696][626795] Updated weights for policy 0, policy_version 258762 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:58,975][24592] Fps is (10 sec: 40959.5, 60 sec: 41233.0, 300 sec: 41265.5). Total num frames: 2119827456. Throughput: 0: 10331.0. Samples: 279934962. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:36:58,978][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:36:59,617][626795] Updated weights for policy 0, policy_version 258772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:01,683][626795] Updated weights for policy 0, policy_version 258782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:03,521][626795] Updated weights for policy 0, policy_version 258792 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:03,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41233.0, 300 sec: 41237.7). Total num frames: 2120032256. Throughput: 0: 10320.5. Samples: 279996858. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:03,977][24592] Avg episode reward: [(0, '4.926')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000258793_2120032256.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:04,072][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000257588_2110160896.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:05,577][626795] Updated weights for policy 0, policy_version 258802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:07,571][626795] Updated weights for policy 0, policy_version 258812 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:08,976][24592] Fps is (10 sec: 41776.2, 60 sec: 41369.1, 300 sec: 41265.4). Total num frames: 2120245248. Throughput: 0: 10327.9. Samples: 280058796. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:08,979][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:09,523][626795] Updated weights for policy 0, policy_version 258822 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:11,610][626795] Updated weights for policy 0, policy_version 258832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:13,480][626795] Updated weights for policy 0, policy_version 258842 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2120450048. Throughput: 0: 10339.6. Samples: 280090182. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:13,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:15,396][626795] Updated weights for policy 0, policy_version 258852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:17,400][626795] Updated weights for policy 0, policy_version 258862 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:18,975][24592] Fps is (10 sec: 40963.0, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2120654848. Throughput: 0: 10369.2. Samples: 280152936. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:18,976][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:19,490][626795] Updated weights for policy 0, policy_version 258872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:21,378][626795] Updated weights for policy 0, policy_version 258882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:23,360][626795] Updated weights for policy 0, policy_version 258892 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:23,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41369.5, 300 sec: 41237.7). Total num frames: 2120867840. Throughput: 0: 10326.7. Samples: 280214748. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:23,976][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:25,350][626795] Updated weights for policy 0, policy_version 258902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:27,362][626795] Updated weights for policy 0, policy_version 258912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:28,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2121072640. Throughput: 0: 10311.2. Samples: 280245498. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:28,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:29,268][626795] Updated weights for policy 0, policy_version 258922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:31,365][626795] Updated weights for policy 0, policy_version 258932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:33,161][626795] Updated weights for policy 0, policy_version 258942 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:33,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41369.4, 300 sec: 41182.2). Total num frames: 2121277440. Throughput: 0: 10313.0. Samples: 280307160. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:33,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:35,387][626795] Updated weights for policy 0, policy_version 258952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:37,440][626795] Updated weights for policy 0, policy_version 258962 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:38,975][24592] Fps is (10 sec: 40141.3, 60 sec: 41096.7, 300 sec: 41154.4). Total num frames: 2121474048. Throughput: 0: 10275.6. Samples: 280367292. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:38,979][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:39,699][626795] Updated weights for policy 0, policy_version 258972 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:41,973][626795] Updated weights for policy 0, policy_version 258982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:43,975][24592] Fps is (10 sec: 37684.6, 60 sec: 40687.0, 300 sec: 41126.6). Total num frames: 2121654272. Throughput: 0: 10183.5. Samples: 280393218. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:43,977][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:44,134][626795] Updated weights for policy 0, policy_version 258992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:46,073][626795] Updated weights for policy 0, policy_version 259002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:48,320][626795] Updated weights for policy 0, policy_version 259012 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:48,976][24592] Fps is (10 sec: 37681.9, 60 sec: 40550.1, 300 sec: 41071.0). Total num frames: 2121850880. Throughput: 0: 10120.3. Samples: 280452276. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:48,981][24592] Avg episode reward: [(0, '4.936')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:50,335][626795] Updated weights for policy 0, policy_version 259022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:52,482][626795] Updated weights for policy 0, policy_version 259032 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:53,976][24592] Fps is (10 sec: 39318.9, 60 sec: 40413.4, 300 sec: 41015.4). Total num frames: 2122047488. Throughput: 0: 10063.1. Samples: 280511634. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:53,977][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:54,353][626795] Updated weights for policy 0, policy_version 259042 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:56,383][626795] Updated weights for policy 0, policy_version 259052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:37:58,341][626795] Updated weights for policy 0, policy_version 259062 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:58,975][24592] Fps is (10 sec: 40142.5, 60 sec: 40413.9, 300 sec: 41015.6). Total num frames: 2122252288. Throughput: 0: 10031.6. Samples: 280541604. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:37:58,976][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:00,353][626795] Updated weights for policy 0, policy_version 259072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:02,258][626795] Updated weights for policy 0, policy_version 259082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:03,975][24592] Fps is (10 sec: 41782.1, 60 sec: 40550.4, 300 sec: 41043.3). Total num frames: 2122465280. Throughput: 0: 10030.7. Samples: 280604316. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:03,977][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:04,299][626795] Updated weights for policy 0, policy_version 259092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:06,350][626795] Updated weights for policy 0, policy_version 259102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:08,058][626795] Updated weights for policy 0, policy_version 259112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 40550.9, 300 sec: 41043.3). Total num frames: 2122678272. Throughput: 0: 10041.0. Samples: 280666590. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:08,977][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:10,196][626795] Updated weights for policy 0, policy_version 259122 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:12,189][626795] Updated weights for policy 0, policy_version 259132 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:13,976][24592] Fps is (10 sec: 41775.0, 60 sec: 40549.7, 300 sec: 41043.2). Total num frames: 2122883072. Throughput: 0: 10048.1. Samples: 280697670. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:13,978][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:14,065][626795] Updated weights for policy 0, policy_version 259142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:16,085][626795] Updated weights for policy 0, policy_version 259152 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:18,013][626795] Updated weights for policy 0, policy_version 259162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:18,976][24592] Fps is (10 sec: 41778.8, 60 sec: 40686.9, 300 sec: 41043.3). Total num frames: 2123096064. Throughput: 0: 10088.5. Samples: 280761138. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:18,977][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:19,969][626795] Updated weights for policy 0, policy_version 259172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:21,982][626795] Updated weights for policy 0, policy_version 259182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:23,946][626795] Updated weights for policy 0, policy_version 259192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:23,976][24592] Fps is (10 sec: 41781.8, 60 sec: 40550.2, 300 sec: 41043.3). Total num frames: 2123300864. Throughput: 0: 10129.5. Samples: 280823124. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:23,978][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:25,911][626795] Updated weights for policy 0, policy_version 259202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:27,900][626795] Updated weights for policy 0, policy_version 259212 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:28,976][24592] Fps is (10 sec: 40959.8, 60 sec: 40550.4, 300 sec: 41043.3). Total num frames: 2123505664. Throughput: 0: 10254.8. Samples: 280854684. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:28,977][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:29,856][626795] Updated weights for policy 0, policy_version 259222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:31,790][626795] Updated weights for policy 0, policy_version 259232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:33,812][626795] Updated weights for policy 0, policy_version 259242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:33,975][24592] Fps is (10 sec: 40961.7, 60 sec: 40550.7, 300 sec: 41015.5). Total num frames: 2123710464. Throughput: 0: 10311.6. Samples: 280916292. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:33,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:35,742][626795] Updated weights for policy 0, policy_version 259252 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:37,739][626795] Updated weights for policy 0, policy_version 259262 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:38,976][24592] Fps is (10 sec: 41779.3, 60 sec: 40823.4, 300 sec: 41293.2). Total num frames: 2123923456. Throughput: 0: 10390.9. Samples: 280979220. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:38,978][24592] Avg episode reward: [(0, '4.943')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:39,734][626795] Updated weights for policy 0, policy_version 259272 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:41,615][626795] Updated weights for policy 0, policy_version 259282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:43,533][626795] Updated weights for policy 0, policy_version 259292 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:43,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41233.0, 300 sec: 41348.8). Total num frames: 2124128256. Throughput: 0: 10424.5. Samples: 281010708. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:43,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:45,597][626795] Updated weights for policy 0, policy_version 259302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:47,607][626795] Updated weights for policy 0, policy_version 259312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:48,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41506.4, 300 sec: 41376.5). Total num frames: 2124341248. Throughput: 0: 10407.3. Samples: 281072646. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:48,978][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:49,590][626795] Updated weights for policy 0, policy_version 259322 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:51,523][626795] Updated weights for policy 0, policy_version 259332 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:53,442][626795] Updated weights for policy 0, policy_version 259342 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:53,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41643.0, 300 sec: 41321.0). Total num frames: 2124546048. Throughput: 0: 10419.8. Samples: 281135484. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:53,977][24592] Avg episode reward: [(0, '4.373')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:55,456][626795] Updated weights for policy 0, policy_version 259352 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:57,407][626795] Updated weights for policy 0, policy_version 259362 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:58,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41642.6, 300 sec: 41321.0). Total num frames: 2124750848. Throughput: 0: 10405.4. Samples: 281165904. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:38:58,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:38:59,433][626795] Updated weights for policy 0, policy_version 259372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:01,458][626795] Updated weights for policy 0, policy_version 259382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:03,400][626795] Updated weights for policy 0, policy_version 259392 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:03,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41642.3, 300 sec: 41348.7). Total num frames: 2124963840. Throughput: 0: 10377.6. Samples: 281228136. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:03,977][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000259395_2124963840.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:04,096][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000258187_2115067904.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:05,360][626795] Updated weights for policy 0, policy_version 259402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:07,436][626795] Updated weights for policy 0, policy_version 259412 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:08,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41369.6, 300 sec: 41293.2). Total num frames: 2125160448. Throughput: 0: 10361.0. Samples: 281289366. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:08,977][24592] Avg episode reward: [(0, '4.900')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:09,371][626795] Updated weights for policy 0, policy_version 259422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:11,349][626795] Updated weights for policy 0, policy_version 259432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:13,385][626795] Updated weights for policy 0, policy_version 259442 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:13,975][24592] Fps is (10 sec: 40962.0, 60 sec: 41506.8, 300 sec: 41321.0). Total num frames: 2125373440. Throughput: 0: 10347.2. Samples: 281320308. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:13,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:15,319][626795] Updated weights for policy 0, policy_version 259452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:17,369][626795] Updated weights for policy 0, policy_version 259462 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:18,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.6, 300 sec: 41293.3). Total num frames: 2125578240. Throughput: 0: 10365.6. Samples: 281382744. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:18,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:19,196][626795] Updated weights for policy 0, policy_version 259472 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:21,272][626795] Updated weights for policy 0, policy_version 259482 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:23,181][626795] Updated weights for policy 0, policy_version 259492 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:23,977][24592] Fps is (10 sec: 40952.1, 60 sec: 41368.5, 300 sec: 41293.0). Total num frames: 2125783040. Throughput: 0: 10339.6. Samples: 281444520. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:23,979][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:25,191][626795] Updated weights for policy 0, policy_version 259502 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:27,111][626795] Updated weights for policy 0, policy_version 259512 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:28,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41506.2, 300 sec: 41293.2). Total num frames: 2125996032. Throughput: 0: 10343.4. Samples: 281476158. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:28,976][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:29,083][626795] Updated weights for policy 0, policy_version 259522 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:31,108][626795] Updated weights for policy 0, policy_version 259532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:33,104][626795] Updated weights for policy 0, policy_version 259542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:33,976][24592] Fps is (10 sec: 41785.2, 60 sec: 41505.8, 300 sec: 41293.2). Total num frames: 2126200832. Throughput: 0: 10342.5. Samples: 281538066. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:33,977][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:35,182][626795] Updated weights for policy 0, policy_version 259552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:37,153][626795] Updated weights for policy 0, policy_version 259562 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:38,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41369.7, 300 sec: 41265.5). Total num frames: 2126405632. Throughput: 0: 10324.4. Samples: 281600082. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:38,977][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:39,026][626795] Updated weights for policy 0, policy_version 259572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:41,010][626795] Updated weights for policy 0, policy_version 259582 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:43,052][626795] Updated weights for policy 0, policy_version 259592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:43,975][24592] Fps is (10 sec: 41781.3, 60 sec: 41506.2, 300 sec: 41293.2). Total num frames: 2126618624. Throughput: 0: 10338.7. Samples: 281631144. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:43,977][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:44,946][626795] Updated weights for policy 0, policy_version 259602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:46,978][626795] Updated weights for policy 0, policy_version 259612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:48,885][626795] Updated weights for policy 0, policy_version 259622 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:48,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41369.3, 300 sec: 41265.4). Total num frames: 2126823424. Throughput: 0: 10333.4. Samples: 281693136. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:48,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:50,844][626795] Updated weights for policy 0, policy_version 259632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:52,694][626795] Updated weights for policy 0, policy_version 259642 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:53,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41506.2, 300 sec: 41293.3). Total num frames: 2127036416. Throughput: 0: 10386.5. Samples: 281756760. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:53,977][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:54,789][626795] Updated weights for policy 0, policy_version 259652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:56,716][626795] Updated weights for policy 0, policy_version 259662 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:39:58,709][626795] Updated weights for policy 0, policy_version 259672 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:58,975][24592] Fps is (10 sec: 42600.0, 60 sec: 41642.7, 300 sec: 41321.0). Total num frames: 2127249408. Throughput: 0: 10380.3. Samples: 281787420. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:39:58,976][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:00,648][626795] Updated weights for policy 0, policy_version 259682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:02,660][626795] Updated weights for policy 0, policy_version 259692 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:03,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41506.5, 300 sec: 41293.2). Total num frames: 2127454208. Throughput: 0: 10395.7. Samples: 281850552. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:03,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:04,630][626795] Updated weights for policy 0, policy_version 259702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:06,524][626795] Updated weights for policy 0, policy_version 259712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:08,522][626795] Updated weights for policy 0, policy_version 259722 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:08,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41642.7, 300 sec: 41265.5). Total num frames: 2127659008. Throughput: 0: 10398.6. Samples: 281912436. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:08,976][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:10,535][626795] Updated weights for policy 0, policy_version 259732 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:12,421][626795] Updated weights for policy 0, policy_version 259742 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:13,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41506.1, 300 sec: 41265.5). Total num frames: 2127863808. Throughput: 0: 10386.4. Samples: 281943546. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:13,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:14,545][626795] Updated weights for policy 0, policy_version 259752 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:16,494][626795] Updated weights for policy 0, policy_version 259762 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:18,409][626795] Updated weights for policy 0, policy_version 259772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41642.7, 300 sec: 41265.5). Total num frames: 2128076800. Throughput: 0: 10400.6. Samples: 282006090. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:18,979][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:20,434][626795] Updated weights for policy 0, policy_version 259782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:22,389][626795] Updated weights for policy 0, policy_version 259792 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:23,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41644.0, 300 sec: 41265.5). Total num frames: 2128281600. Throughput: 0: 10409.5. Samples: 282068508. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:23,978][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:24,308][626795] Updated weights for policy 0, policy_version 259802 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:26,312][626795] Updated weights for policy 0, policy_version 259812 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:28,264][626795] Updated weights for policy 0, policy_version 259822 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:28,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41506.1, 300 sec: 41237.7). Total num frames: 2128486400. Throughput: 0: 10411.9. Samples: 282099678. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:28,976][24592] Avg episode reward: [(0, '4.936')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:30,073][626795] Updated weights for policy 0, policy_version 259832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:32,143][626795] Updated weights for policy 0, policy_version 259842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:33,903][626795] Updated weights for policy 0, policy_version 259852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:33,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41779.6, 300 sec: 41265.5). Total num frames: 2128707584. Throughput: 0: 10444.6. Samples: 282163140. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:33,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:35,953][626795] Updated weights for policy 0, policy_version 259862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:37,905][626795] Updated weights for policy 0, policy_version 259872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:38,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41779.2, 300 sec: 41265.5). Total num frames: 2128912384. Throughput: 0: 10438.5. Samples: 282226494. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:38,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:39,910][626795] Updated weights for policy 0, policy_version 259882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:41,840][626795] Updated weights for policy 0, policy_version 259892 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:43,975][24592] Fps is (10 sec: 40140.8, 60 sec: 41506.2, 300 sec: 41237.7). Total num frames: 2129108992. Throughput: 0: 10452.8. Samples: 282257796. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:43,977][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:43,977][626795] Updated weights for policy 0, policy_version 259902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:45,791][626795] Updated weights for policy 0, policy_version 259912 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:47,816][626795] Updated weights for policy 0, policy_version 259922 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41779.5, 300 sec: 41293.2). Total num frames: 2129330176. Throughput: 0: 10422.9. Samples: 282319584. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:48,977][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:49,776][626795] Updated weights for policy 0, policy_version 259932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:51,864][626795] Updated weights for policy 0, policy_version 259942 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:53,678][626795] Updated weights for policy 0, policy_version 259952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:53,975][24592] Fps is (10 sec: 42598.0, 60 sec: 41642.7, 300 sec: 41293.2). Total num frames: 2129534976. Throughput: 0: 10434.9. Samples: 282382008. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:53,976][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:55,783][626795] Updated weights for policy 0, policy_version 259962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:57,618][626795] Updated weights for policy 0, policy_version 259972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:58,977][24592] Fps is (10 sec: 40952.1, 60 sec: 41504.8, 300 sec: 41293.0). Total num frames: 2129739776. Throughput: 0: 10420.0. Samples: 282412464. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:40:58,979][24592] Avg episode reward: [(0, '4.958')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:40:59,723][626795] Updated weights for policy 0, policy_version 259982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:01,568][626795] Updated weights for policy 0, policy_version 259992 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:03,613][626795] Updated weights for policy 0, policy_version 260002 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:03,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41642.7, 300 sec: 41321.0). Total num frames: 2129952768. Throughput: 0: 10432.2. Samples: 282475536. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:03,976][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000260004_2129952768.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:04,061][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000258793_2120032256.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:05,518][626795] Updated weights for policy 0, policy_version 260012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:07,513][626795] Updated weights for policy 0, policy_version 260022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:08,975][24592] Fps is (10 sec: 41787.2, 60 sec: 41642.7, 300 sec: 41321.0). Total num frames: 2130157568. Throughput: 0: 10418.8. Samples: 282537354. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:08,976][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:09,417][626795] Updated weights for policy 0, policy_version 260032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:11,523][626795] Updated weights for policy 0, policy_version 260042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:13,510][626795] Updated weights for policy 0, policy_version 260052 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:13,975][24592] Fps is (10 sec: 40959.5, 60 sec: 41642.6, 300 sec: 41321.0). Total num frames: 2130362368. Throughput: 0: 10406.8. Samples: 282567984. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:13,976][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:15,499][626795] Updated weights for policy 0, policy_version 260062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:17,374][626795] Updated weights for policy 0, policy_version 260072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:18,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41506.2, 300 sec: 41293.2). Total num frames: 2130567168. Throughput: 0: 10385.3. Samples: 282630480. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:18,978][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:19,424][626795] Updated weights for policy 0, policy_version 260082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:21,396][626795] Updated weights for policy 0, policy_version 260092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:23,328][626795] Updated weights for policy 0, policy_version 260102 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:23,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41642.7, 300 sec: 41321.0). Total num frames: 2130780160. Throughput: 0: 10353.7. Samples: 282692412. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:23,976][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:25,325][626795] Updated weights for policy 0, policy_version 260112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:27,357][626795] Updated weights for policy 0, policy_version 260122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:28,976][24592] Fps is (10 sec: 41777.3, 60 sec: 41642.4, 300 sec: 41321.0). Total num frames: 2130984960. Throughput: 0: 10346.0. Samples: 282723372. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:28,977][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:29,216][626795] Updated weights for policy 0, policy_version 260132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:31,215][626795] Updated weights for policy 0, policy_version 260142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:33,243][626795] Updated weights for policy 0, policy_version 260152 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:33,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41369.6, 300 sec: 41293.3). Total num frames: 2131189760. Throughput: 0: 10358.4. Samples: 282785712. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:33,976][24592] Avg episode reward: [(0, '4.922')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:35,217][626795] Updated weights for policy 0, policy_version 260162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:37,187][626795] Updated weights for policy 0, policy_version 260172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:38,975][24592] Fps is (10 sec: 41781.1, 60 sec: 41506.2, 300 sec: 41321.0). Total num frames: 2131402752. Throughput: 0: 10373.8. Samples: 282848826. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:38,977][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:39,228][626795] Updated weights for policy 0, policy_version 260182 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:41,031][626772] Signal inference workers to stop experience collection... (3750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:41,032][626772] Signal inference workers to resume experience collection... (3750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:41,041][626795] InferenceWorker_p0-w0: stopping experience collection (3750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:41,041][626795] InferenceWorker_p0-w0: resuming experience collection (3750 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:41,062][626795] Updated weights for policy 0, policy_version 260192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:43,222][626795] Updated weights for policy 0, policy_version 260202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:43,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41642.5, 300 sec: 41321.0). Total num frames: 2131607552. Throughput: 0: 10370.0. Samples: 282879096. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:43,976][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:45,169][626795] Updated weights for policy 0, policy_version 260212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:47,211][626795] Updated weights for policy 0, policy_version 260222 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:48,976][24592] Fps is (10 sec: 40958.2, 60 sec: 41369.3, 300 sec: 41321.0). Total num frames: 2131812352. Throughput: 0: 10327.1. Samples: 282940260. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:48,977][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:49,129][626795] Updated weights for policy 0, policy_version 260232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:51,046][626795] Updated weights for policy 0, policy_version 260242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:53,125][626795] Updated weights for policy 0, policy_version 260252 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:53,991][24592] Fps is (10 sec: 40896.4, 60 sec: 41358.8, 300 sec: 41318.8). Total num frames: 2132017152. Throughput: 0: 10336.7. Samples: 283002666. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:53,992][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:55,080][626795] Updated weights for policy 0, policy_version 260262 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:57,075][626795] Updated weights for policy 0, policy_version 260272 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:58,975][24592] Fps is (10 sec: 40961.7, 60 sec: 41370.9, 300 sec: 41321.0). Total num frames: 2132221952. Throughput: 0: 10341.5. Samples: 283033350. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:41:58,976][24592] Avg episode reward: [(0, '5.014')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:41:58,990][626795] Updated weights for policy 0, policy_version 260282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:01,008][626795] Updated weights for policy 0, policy_version 260292 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:02,903][626795] Updated weights for policy 0, policy_version 260302 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:03,975][24592] Fps is (10 sec: 41845.0, 60 sec: 41369.6, 300 sec: 41321.1). Total num frames: 2132434944. Throughput: 0: 10345.7. Samples: 283096038. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:03,977][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:04,892][626795] Updated weights for policy 0, policy_version 260312 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:06,897][626795] Updated weights for policy 0, policy_version 260322 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:08,807][626795] Updated weights for policy 0, policy_version 260332 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:08,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41369.5, 300 sec: 41321.0). Total num frames: 2132639744. Throughput: 0: 10359.8. Samples: 283158606. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:08,977][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:10,797][626795] Updated weights for policy 0, policy_version 260342 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:12,783][626795] Updated weights for policy 0, policy_version 260352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:13,976][24592] Fps is (10 sec: 40955.6, 60 sec: 41368.9, 300 sec: 41320.9). Total num frames: 2132844544. Throughput: 0: 10363.5. Samples: 283189734. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:13,978][24592] Avg episode reward: [(0, '4.929')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:14,813][626795] Updated weights for policy 0, policy_version 260362 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:16,730][626795] Updated weights for policy 0, policy_version 260372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:18,760][626795] Updated weights for policy 0, policy_version 260382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.0, 300 sec: 41321.0). Total num frames: 2133057536. Throughput: 0: 10352.8. Samples: 283251588. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:18,976][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:20,770][626795] Updated weights for policy 0, policy_version 260392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:22,673][626795] Updated weights for policy 0, policy_version 260402 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:23,975][24592] Fps is (10 sec: 41783.6, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2133262336. Throughput: 0: 10333.7. Samples: 283313844. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:23,977][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:24,697][626795] Updated weights for policy 0, policy_version 260412 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:26,811][626795] Updated weights for policy 0, policy_version 260422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:28,614][626795] Updated weights for policy 0, policy_version 260432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:28,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41369.9, 300 sec: 41321.1). Total num frames: 2133467136. Throughput: 0: 10341.0. Samples: 283344438. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:28,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:30,739][626795] Updated weights for policy 0, policy_version 260442 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:32,564][626795] Updated weights for policy 0, policy_version 260452 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:33,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41369.5, 300 sec: 41348.8). Total num frames: 2133671936. Throughput: 0: 10365.4. Samples: 283406700. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:33,977][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:34,625][626795] Updated weights for policy 0, policy_version 260462 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:36,581][626795] Updated weights for policy 0, policy_version 260472 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:38,556][626795] Updated weights for policy 0, policy_version 260482 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:38,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41369.6, 300 sec: 41459.8). Total num frames: 2133884928. Throughput: 0: 10369.9. Samples: 283469148. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:38,977][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:40,467][626795] Updated weights for policy 0, policy_version 260492 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:42,464][626795] Updated weights for policy 0, policy_version 260502 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:43,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41369.7, 300 sec: 41487.7). Total num frames: 2134089728. Throughput: 0: 10378.0. Samples: 283500360. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:43,976][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:44,390][626795] Updated weights for policy 0, policy_version 260512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:46,505][626795] Updated weights for policy 0, policy_version 260522 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:48,394][626795] Updated weights for policy 0, policy_version 260532 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:48,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41506.4, 300 sec: 41543.3). Total num frames: 2134302720. Throughput: 0: 10351.2. Samples: 283561842. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:48,977][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:50,463][626795] Updated weights for policy 0, policy_version 260542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:52,436][626795] Updated weights for policy 0, policy_version 260552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:53,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41380.2, 300 sec: 41515.3). Total num frames: 2134499328. Throughput: 0: 10340.8. Samples: 283623942. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:53,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:54,412][626795] Updated weights for policy 0, policy_version 260562 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:56,471][626795] Updated weights for policy 0, policy_version 260572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:42:58,270][626795] Updated weights for policy 0, policy_version 260582 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:58,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41506.1, 300 sec: 41515.4). Total num frames: 2134712320. Throughput: 0: 10335.4. Samples: 283654818. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:42:58,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:00,309][626795] Updated weights for policy 0, policy_version 260592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:02,349][626795] Updated weights for policy 0, policy_version 260602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:03,975][24592] Fps is (10 sec: 42600.0, 60 sec: 41506.2, 300 sec: 41515.4). Total num frames: 2134925312. Throughput: 0: 10341.5. Samples: 283716954. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:03,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000260611_2134925312.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:04,053][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000259395_2124963840.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:04,390][626795] Updated weights for policy 0, policy_version 260612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:06,329][626795] Updated weights for policy 0, policy_version 260622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:08,319][626795] Updated weights for policy 0, policy_version 260632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:08,976][24592] Fps is (10 sec: 40958.7, 60 sec: 41369.4, 300 sec: 41487.7). Total num frames: 2135121920. Throughput: 0: 10326.1. Samples: 283778520. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:08,976][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:10,249][626795] Updated weights for policy 0, policy_version 260642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:12,199][626795] Updated weights for policy 0, policy_version 260652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:13,976][24592] Fps is (10 sec: 40136.9, 60 sec: 41369.7, 300 sec: 41459.7). Total num frames: 2135326720. Throughput: 0: 10333.8. Samples: 283809468. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:13,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:14,202][626795] Updated weights for policy 0, policy_version 260662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:16,138][626795] Updated weights for policy 0, policy_version 260672 (0.0031)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:18,146][626795] Updated weights for policy 0, policy_version 260682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:18,975][24592] Fps is (10 sec: 41780.4, 60 sec: 41369.6, 300 sec: 41487.7). Total num frames: 2135539712. Throughput: 0: 10347.2. Samples: 283872324. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:18,977][24592] Avg episode reward: [(0, '4.904')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:20,144][626795] Updated weights for policy 0, policy_version 260692 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:22,088][626795] Updated weights for policy 0, policy_version 260702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:23,976][24592] Fps is (10 sec: 41782.3, 60 sec: 41369.5, 300 sec: 41487.6). Total num frames: 2135744512. Throughput: 0: 10336.6. Samples: 283934298. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:23,976][24592] Avg episode reward: [(0, '4.429')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:24,167][626795] Updated weights for policy 0, policy_version 260712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:26,074][626795] Updated weights for policy 0, policy_version 260722 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:28,072][626795] Updated weights for policy 0, policy_version 260732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:28,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41369.6, 300 sec: 41487.6). Total num frames: 2135949312. Throughput: 0: 10322.5. Samples: 283964874. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:28,986][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:30,129][626795] Updated weights for policy 0, policy_version 260742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:32,085][626795] Updated weights for policy 0, policy_version 260752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:33,909][626795] Updated weights for policy 0, policy_version 260762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:33,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41506.1, 300 sec: 41487.6). Total num frames: 2136162304. Throughput: 0: 10339.2. Samples: 284027106. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:33,978][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:35,959][626795] Updated weights for policy 0, policy_version 260772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:37,925][626795] Updated weights for policy 0, policy_version 260782 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:38,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.6, 300 sec: 41487.6). Total num frames: 2136367104. Throughput: 0: 10360.9. Samples: 284090178. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:38,977][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:39,883][626795] Updated weights for policy 0, policy_version 260792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:41,810][626795] Updated weights for policy 0, policy_version 260802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:43,804][626795] Updated weights for policy 0, policy_version 260812 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:43,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41506.1, 300 sec: 41487.6). Total num frames: 2136580096. Throughput: 0: 10371.9. Samples: 284121552. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:43,976][24592] Avg episode reward: [(0, '5.025')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:45,771][626795] Updated weights for policy 0, policy_version 260822 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:47,738][626795] Updated weights for policy 0, policy_version 260832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:48,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41369.5, 300 sec: 41487.6). Total num frames: 2136784896. Throughput: 0: 10381.6. Samples: 284184126. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:48,977][24592] Avg episode reward: [(0, '4.925')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:49,666][626795] Updated weights for policy 0, policy_version 260842 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:51,793][626795] Updated weights for policy 0, policy_version 260852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:53,639][626795] Updated weights for policy 0, policy_version 260862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:53,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41506.4, 300 sec: 41487.6). Total num frames: 2136989696. Throughput: 0: 10374.6. Samples: 284245374. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:53,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:55,801][626795] Updated weights for policy 0, policy_version 260872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:57,763][626795] Updated weights for policy 0, policy_version 260882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:58,976][24592] Fps is (10 sec: 40959.0, 60 sec: 41369.4, 300 sec: 41459.9). Total num frames: 2137194496. Throughput: 0: 10364.1. Samples: 284275848. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:43:58,977][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:43:59,769][626795] Updated weights for policy 0, policy_version 260892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:01,759][626795] Updated weights for policy 0, policy_version 260902 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:03,620][626795] Updated weights for policy 0, policy_version 260912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:03,976][24592] Fps is (10 sec: 40958.0, 60 sec: 41232.7, 300 sec: 41487.6). Total num frames: 2137399296. Throughput: 0: 10335.4. Samples: 284337420. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:03,977][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:05,719][626795] Updated weights for policy 0, policy_version 260922 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:07,664][626795] Updated weights for policy 0, policy_version 260932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:08,976][24592] Fps is (10 sec: 40960.5, 60 sec: 41369.7, 300 sec: 41459.8). Total num frames: 2137604096. Throughput: 0: 10353.2. Samples: 284400192. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:08,976][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:09,711][626795] Updated weights for policy 0, policy_version 260942 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:11,606][626795] Updated weights for policy 0, policy_version 260952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:13,580][626795] Updated weights for policy 0, policy_version 260962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:13,975][24592] Fps is (10 sec: 41781.1, 60 sec: 41506.7, 300 sec: 41487.6). Total num frames: 2137817088. Throughput: 0: 10350.0. Samples: 284430624. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:13,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:15,502][626795] Updated weights for policy 0, policy_version 260972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:17,498][626795] Updated weights for policy 0, policy_version 260982 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:18,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41369.6, 300 sec: 41487.9). Total num frames: 2138021888. Throughput: 0: 10364.6. Samples: 284493510. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:18,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:19,493][626795] Updated weights for policy 0, policy_version 260992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:21,487][626795] Updated weights for policy 0, policy_version 261002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:23,433][626795] Updated weights for policy 0, policy_version 261012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:23,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41369.7, 300 sec: 41459.8). Total num frames: 2138226688. Throughput: 0: 10343.7. Samples: 284555646. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:23,977][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:25,484][626795] Updated weights for policy 0, policy_version 261022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:27,414][626795] Updated weights for policy 0, policy_version 261032 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:28,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41506.0, 300 sec: 41487.7). Total num frames: 2138439680. Throughput: 0: 10319.6. Samples: 284585934. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:28,979][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:29,519][626795] Updated weights for policy 0, policy_version 261042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:31,411][626795] Updated weights for policy 0, policy_version 261052 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:33,409][626795] Updated weights for policy 0, policy_version 261062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:33,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.7, 300 sec: 41487.6). Total num frames: 2138644480. Throughput: 0: 10317.2. Samples: 284648400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:33,976][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:35,357][626795] Updated weights for policy 0, policy_version 261072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:37,319][626795] Updated weights for policy 0, policy_version 261082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:38,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41369.6, 300 sec: 41459.8). Total num frames: 2138849280. Throughput: 0: 10335.4. Samples: 284710470. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:38,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:39,322][626795] Updated weights for policy 0, policy_version 261092 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:41,351][626795] Updated weights for policy 0, policy_version 261102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:43,175][626795] Updated weights for policy 0, policy_version 261112 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:43,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41369.6, 300 sec: 41487.7). Total num frames: 2139062272. Throughput: 0: 10343.7. Samples: 284741310. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:43,978][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:45,208][626795] Updated weights for policy 0, policy_version 261122 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:47,283][626795] Updated weights for policy 0, policy_version 261132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.6, 300 sec: 41459.9). Total num frames: 2139267072. Throughput: 0: 10365.7. Samples: 284803872. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:48,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:49,123][626795] Updated weights for policy 0, policy_version 261142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:51,151][626795] Updated weights for policy 0, policy_version 261152 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:53,028][626795] Updated weights for policy 0, policy_version 261162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:53,993][24592] Fps is (10 sec: 40890.2, 60 sec: 41357.8, 300 sec: 41429.7). Total num frames: 2139471872. Throughput: 0: 10360.0. Samples: 284866566. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:53,993][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:55,074][626795] Updated weights for policy 0, policy_version 261172 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:57,099][626795] Updated weights for policy 0, policy_version 261182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:58,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41369.8, 300 sec: 41432.1). Total num frames: 2139676672. Throughput: 0: 10366.9. Samples: 284897136. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:44:58,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:44:59,081][626795] Updated weights for policy 0, policy_version 261192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:01,052][626795] Updated weights for policy 0, policy_version 261202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:02,962][626795] Updated weights for policy 0, policy_version 261212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:03,976][24592] Fps is (10 sec: 41029.3, 60 sec: 41369.8, 300 sec: 41432.0). Total num frames: 2139881472. Throughput: 0: 10346.2. Samples: 284959092. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:03,977][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000261216_2139881472.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:04,068][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000260004_2129952768.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:05,094][626795] Updated weights for policy 0, policy_version 261222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:07,145][626795] Updated weights for policy 0, policy_version 261232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:08,962][626795] Updated weights for policy 0, policy_version 261242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:08,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41506.2, 300 sec: 41459.8). Total num frames: 2140094464. Throughput: 0: 10343.2. Samples: 285021090. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:08,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:10,884][626795] Updated weights for policy 0, policy_version 261252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:12,932][626795] Updated weights for policy 0, policy_version 261262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:13,975][24592] Fps is (10 sec: 41780.3, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2140299264. Throughput: 0: 10355.1. Samples: 285051912. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:13,977][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:14,800][626795] Updated weights for policy 0, policy_version 261272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:16,878][626795] Updated weights for policy 0, policy_version 261282 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:18,692][626795] Updated weights for policy 0, policy_version 261292 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:18,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2140504064. Throughput: 0: 10368.0. Samples: 285114960. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:18,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:20,768][626795] Updated weights for policy 0, policy_version 261302 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:22,610][626795] Updated weights for policy 0, policy_version 261312 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:23,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41506.0, 300 sec: 41459.8). Total num frames: 2140717056. Throughput: 0: 10387.3. Samples: 285177900. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:23,976][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:24,691][626795] Updated weights for policy 0, policy_version 261322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:26,672][626795] Updated weights for policy 0, policy_version 261332 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:28,636][626795] Updated weights for policy 0, policy_version 261342 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:28,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41369.7, 300 sec: 41404.3). Total num frames: 2140921856. Throughput: 0: 10378.9. Samples: 285208362. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:28,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:30,785][626795] Updated weights for policy 0, policy_version 261352 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:32,621][626795] Updated weights for policy 0, policy_version 261362 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:33,976][24592] Fps is (10 sec: 40960.4, 60 sec: 41369.5, 300 sec: 41404.3). Total num frames: 2141126656. Throughput: 0: 10347.9. Samples: 285269526. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:33,979][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:34,684][626795] Updated weights for policy 0, policy_version 261372 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:36,690][626795] Updated weights for policy 0, policy_version 261382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:38,581][626795] Updated weights for policy 0, policy_version 261392 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:38,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2141331456. Throughput: 0: 10348.1. Samples: 285332052. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:38,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:40,647][626795] Updated weights for policy 0, policy_version 261402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:42,462][626795] Updated weights for policy 0, policy_version 261412 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:43,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41369.7, 300 sec: 41404.3). Total num frames: 2141544448. Throughput: 0: 10353.1. Samples: 285363024. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:43,977][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:44,554][626795] Updated weights for policy 0, policy_version 261422 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:46,435][626795] Updated weights for policy 0, policy_version 261432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:48,529][626795] Updated weights for policy 0, policy_version 261442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:48,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.7, 300 sec: 41404.3). Total num frames: 2141749248. Throughput: 0: 10355.9. Samples: 285425106. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:48,976][24592] Avg episode reward: [(0, '4.943')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:50,489][626795] Updated weights for policy 0, policy_version 261452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:52,518][626795] Updated weights for policy 0, policy_version 261462 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:53,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41381.4, 300 sec: 41404.6). Total num frames: 2141954048. Throughput: 0: 10358.7. Samples: 285487230. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:53,977][24592] Avg episode reward: [(0, '4.898')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:54,486][626795] Updated weights for policy 0, policy_version 261472 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:56,405][626795] Updated weights for policy 0, policy_version 261482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:45:58,374][626795] Updated weights for policy 0, policy_version 261492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:58,976][24592] Fps is (10 sec: 40959.4, 60 sec: 41369.5, 300 sec: 41376.5). Total num frames: 2142158848. Throughput: 0: 10370.8. Samples: 285518598. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:45:58,977][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:00,466][626795] Updated weights for policy 0, policy_version 261502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:02,409][626795] Updated weights for policy 0, policy_version 261512 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:03,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41506.3, 300 sec: 41404.3). Total num frames: 2142371840. Throughput: 0: 10329.1. Samples: 285579768. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:03,977][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:04,448][626795] Updated weights for policy 0, policy_version 261522 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:06,325][626795] Updated weights for policy 0, policy_version 261532 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:08,293][626795] Updated weights for policy 0, policy_version 261542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:08,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41369.7, 300 sec: 41404.3). Total num frames: 2142576640. Throughput: 0: 10315.9. Samples: 285642114. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:08,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:10,278][626795] Updated weights for policy 0, policy_version 261552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:12,260][626795] Updated weights for policy 0, policy_version 261562 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:13,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41369.6, 300 sec: 41404.3). Total num frames: 2142781440. Throughput: 0: 10328.8. Samples: 285673158. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:13,976][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:14,319][626795] Updated weights for policy 0, policy_version 261572 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:16,270][626795] Updated weights for policy 0, policy_version 261582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:18,204][626795] Updated weights for policy 0, policy_version 261592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:18,977][24592] Fps is (10 sec: 40954.1, 60 sec: 41368.6, 300 sec: 41376.3). Total num frames: 2142986240. Throughput: 0: 10355.6. Samples: 285735540. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:18,979][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:20,147][626795] Updated weights for policy 0, policy_version 261602 (0.0036)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:22,140][626795] Updated weights for policy 0, policy_version 261612 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:23,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41369.7, 300 sec: 41404.4). Total num frames: 2143199232. Throughput: 0: 10364.1. Samples: 285798438. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:23,977][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:24,110][626795] Updated weights for policy 0, policy_version 261622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:25,979][626795] Updated weights for policy 0, policy_version 261632 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:27,990][626795] Updated weights for policy 0, policy_version 261642 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:28,976][24592] Fps is (10 sec: 42601.3, 60 sec: 41505.7, 300 sec: 41432.0). Total num frames: 2143412224. Throughput: 0: 10357.8. Samples: 285829134. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:28,977][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:29,984][626795] Updated weights for policy 0, policy_version 261652 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:32,060][626795] Updated weights for policy 0, policy_version 261662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:33,953][626795] Updated weights for policy 0, policy_version 261672 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:33,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41506.2, 300 sec: 41404.3). Total num frames: 2143617024. Throughput: 0: 10359.3. Samples: 285891276. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:33,978][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:35,875][626795] Updated weights for policy 0, policy_version 261682 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:37,810][626795] Updated weights for policy 0, policy_version 261692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:38,975][24592] Fps is (10 sec: 40962.6, 60 sec: 41506.1, 300 sec: 41404.3). Total num frames: 2143821824. Throughput: 0: 10368.1. Samples: 285953796. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:38,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:39,869][626795] Updated weights for policy 0, policy_version 261702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:41,714][626795] Updated weights for policy 0, policy_version 261712 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:43,825][626795] Updated weights for policy 0, policy_version 261722 (0.0031)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:43,976][24592] Fps is (10 sec: 41777.0, 60 sec: 41505.7, 300 sec: 41432.1). Total num frames: 2144034816. Throughput: 0: 10368.3. Samples: 285985176. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:43,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:45,750][626795] Updated weights for policy 0, policy_version 261732 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:47,744][626795] Updated weights for policy 0, policy_version 261742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:48,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41506.1, 300 sec: 41434.3). Total num frames: 2144239616. Throughput: 0: 10404.3. Samples: 286047960. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:48,977][24592] Avg episode reward: [(0, '5.018')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:49,609][626795] Updated weights for policy 0, policy_version 261752 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:51,577][626795] Updated weights for policy 0, policy_version 261762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:53,566][626795] Updated weights for policy 0, policy_version 261772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:53,976][24592] Fps is (10 sec: 40961.9, 60 sec: 41506.0, 300 sec: 41432.1). Total num frames: 2144444416. Throughput: 0: 10405.0. Samples: 286110342. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:53,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:55,706][626795] Updated weights for policy 0, policy_version 261782 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:57,505][626795] Updated weights for policy 0, policy_version 261792 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:58,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41642.8, 300 sec: 41432.1). Total num frames: 2144657408. Throughput: 0: 10399.3. Samples: 286141128. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:46:58,978][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:46:59,460][626795] Updated weights for policy 0, policy_version 261802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:01,468][626795] Updated weights for policy 0, policy_version 261812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:03,411][626795] Updated weights for policy 0, policy_version 261822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41506.1, 300 sec: 41432.1). Total num frames: 2144862208. Throughput: 0: 10403.8. Samples: 286203696. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:03,976][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:04,037][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000261825_2144870400.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:04,129][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000260611_2134925312.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:05,509][626795] Updated weights for policy 0, policy_version 261832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:07,520][626795] Updated weights for policy 0, policy_version 261842 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:08,976][24592] Fps is (10 sec: 40959.1, 60 sec: 41506.0, 300 sec: 41432.2). Total num frames: 2145067008. Throughput: 0: 10378.1. Samples: 286265454. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:08,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:09,471][626795] Updated weights for policy 0, policy_version 261852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:11,395][626795] Updated weights for policy 0, policy_version 261862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:13,452][626795] Updated weights for policy 0, policy_version 261872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:13,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41506.1, 300 sec: 41404.3). Total num frames: 2145271808. Throughput: 0: 10390.2. Samples: 286296684. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:13,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:15,381][626795] Updated weights for policy 0, policy_version 261882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:17,352][626795] Updated weights for policy 0, policy_version 261892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:18,976][24592] Fps is (10 sec: 41779.2, 60 sec: 41643.5, 300 sec: 41432.1). Total num frames: 2145484800. Throughput: 0: 10384.0. Samples: 286358556. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:18,976][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:19,429][626795] Updated weights for policy 0, policy_version 261902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:21,296][626795] Updated weights for policy 0, policy_version 261912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:23,242][626795] Updated weights for policy 0, policy_version 261922 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:23,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41506.1, 300 sec: 41432.1). Total num frames: 2145689600. Throughput: 0: 10378.7. Samples: 286420836. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:23,976][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:25,244][626795] Updated weights for policy 0, policy_version 261932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:27,131][626795] Updated weights for policy 0, policy_version 261942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:28,975][24592] Fps is (10 sec: 40961.0, 60 sec: 41370.1, 300 sec: 41432.1). Total num frames: 2145894400. Throughput: 0: 10370.3. Samples: 286451832. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:28,977][24592] Avg episode reward: [(0, '4.855')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:29,239][626795] Updated weights for policy 0, policy_version 261952 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:31,177][626795] Updated weights for policy 0, policy_version 261962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:32,962][626795] Updated weights for policy 0, policy_version 261972 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:33,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.2, 300 sec: 41432.1). Total num frames: 2146107392. Throughput: 0: 10391.7. Samples: 286515588. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:33,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:35,077][626795] Updated weights for policy 0, policy_version 261982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:37,075][626795] Updated weights for policy 0, policy_version 261992 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:38,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41506.1, 300 sec: 41432.1). Total num frames: 2146312192. Throughput: 0: 10349.1. Samples: 286576050. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:38,977][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:39,111][626795] Updated weights for policy 0, policy_version 262002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:41,131][626795] Updated weights for policy 0, policy_version 262012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:43,107][626795] Updated weights for policy 0, policy_version 262022 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:43,977][24592] Fps is (10 sec: 40956.0, 60 sec: 41369.3, 300 sec: 41404.2). Total num frames: 2146516992. Throughput: 0: 10339.4. Samples: 286606410. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:43,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:45,083][626795] Updated weights for policy 0, policy_version 262032 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:47,107][626795] Updated weights for policy 0, policy_version 262042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:48,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2146721792. Throughput: 0: 10328.0. Samples: 286668456. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:48,976][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:49,023][626795] Updated weights for policy 0, policy_version 262052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:51,087][626795] Updated weights for policy 0, policy_version 262062 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:53,111][626795] Updated weights for policy 0, policy_version 262072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:53,976][24592] Fps is (10 sec: 40961.3, 60 sec: 41369.2, 300 sec: 41404.2). Total num frames: 2146926592. Throughput: 0: 10322.8. Samples: 286729986. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:53,978][24592] Avg episode reward: [(0, '4.906')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:55,063][626795] Updated weights for policy 0, policy_version 262082 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:57,016][626795] Updated weights for policy 0, policy_version 262092 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:58,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41233.1, 300 sec: 41376.5). Total num frames: 2147131392. Throughput: 0: 10313.5. Samples: 286760790. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:47:58,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:47:59,020][626795] Updated weights for policy 0, policy_version 262102 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:01,032][626795] Updated weights for policy 0, policy_version 262112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:03,016][626795] Updated weights for policy 0, policy_version 262122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:03,976][24592] Fps is (10 sec: 40960.0, 60 sec: 41232.6, 300 sec: 41404.3). Total num frames: 2147336192. Throughput: 0: 10319.2. Samples: 286822926. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:03,977][24592] Avg episode reward: [(0, '4.309')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:04,991][626795] Updated weights for policy 0, policy_version 262132 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:06,914][626795] Updated weights for policy 0, policy_version 262142 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:08,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41233.2, 300 sec: 41404.4). Total num frames: 2147540992. Throughput: 0: 10308.5. Samples: 286884720. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:08,978][24592] Avg episode reward: [(0, '5.079')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:09,060][626795] Updated weights for policy 0, policy_version 262152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:10,948][626795] Updated weights for policy 0, policy_version 262162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:12,951][626795] Updated weights for policy 0, policy_version 262172 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:13,976][24592] Fps is (10 sec: 40960.8, 60 sec: 41232.7, 300 sec: 41376.5). Total num frames: 2147745792. Throughput: 0: 10294.0. Samples: 286915068. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:13,978][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:14,952][626795] Updated weights for policy 0, policy_version 262182 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:17,022][626795] Updated weights for policy 0, policy_version 262192 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:18,847][626795] Updated weights for policy 0, policy_version 262202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41233.2, 300 sec: 41404.3). Total num frames: 2147958784. Throughput: 0: 10253.6. Samples: 286977000. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:18,977][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:20,900][626795] Updated weights for policy 0, policy_version 262212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:22,931][626795] Updated weights for policy 0, policy_version 262222 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:23,976][24592] Fps is (10 sec: 41779.9, 60 sec: 41232.9, 300 sec: 41404.3). Total num frames: 2148163584. Throughput: 0: 10291.7. Samples: 287039178. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:23,977][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:24,948][626795] Updated weights for policy 0, policy_version 262232 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:26,900][626795] Updated weights for policy 0, policy_version 262242 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:28,916][626795] Updated weights for policy 0, policy_version 262252 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:28,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41233.0, 300 sec: 41376.5). Total num frames: 2148368384. Throughput: 0: 10287.3. Samples: 287069328. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:28,976][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:30,522][626772] Signal inference workers to stop experience collection... (3800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:30,523][626772] Signal inference workers to resume experience collection... (3800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:30,535][626795] InferenceWorker_p0-w0: stopping experience collection (3800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:30,540][626795] InferenceWorker_p0-w0: resuming experience collection (3800 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:30,773][626795] Updated weights for policy 0, policy_version 262262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:32,824][626795] Updated weights for policy 0, policy_version 262272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:33,975][24592] Fps is (10 sec: 41780.4, 60 sec: 41233.1, 300 sec: 41404.3). Total num frames: 2148581376. Throughput: 0: 10309.7. Samples: 287132394. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:33,977][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:34,766][626795] Updated weights for policy 0, policy_version 262282 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:36,801][626795] Updated weights for policy 0, policy_version 262292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:38,638][626795] Updated weights for policy 0, policy_version 262302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:38,976][24592] Fps is (10 sec: 40956.0, 60 sec: 41095.9, 300 sec: 41348.6). Total num frames: 2148777984. Throughput: 0: 10313.0. Samples: 287194074. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:38,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:40,810][626795] Updated weights for policy 0, policy_version 262312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:42,830][626795] Updated weights for policy 0, policy_version 262322 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:43,976][24592] Fps is (10 sec: 40140.4, 60 sec: 41097.1, 300 sec: 41348.8). Total num frames: 2148982784. Throughput: 0: 10288.5. Samples: 287223774. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:43,977][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:44,863][626795] Updated weights for policy 0, policy_version 262332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:46,865][626795] Updated weights for policy 0, policy_version 262342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:48,819][626795] Updated weights for policy 0, policy_version 262352 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:48,975][24592] Fps is (10 sec: 40964.4, 60 sec: 41096.5, 300 sec: 41348.8). Total num frames: 2149187584. Throughput: 0: 10270.8. Samples: 287285106. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:48,977][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:50,783][626795] Updated weights for policy 0, policy_version 262362 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:52,793][626795] Updated weights for policy 0, policy_version 262372 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:53,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41097.0, 300 sec: 41348.8). Total num frames: 2149392384. Throughput: 0: 10267.6. Samples: 287346762. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:53,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:54,770][626795] Updated weights for policy 0, policy_version 262382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:56,839][626795] Updated weights for policy 0, policy_version 262392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:48:58,876][626795] Updated weights for policy 0, policy_version 262402 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:58,976][24592] Fps is (10 sec: 40956.1, 60 sec: 41095.9, 300 sec: 41348.7). Total num frames: 2149597184. Throughput: 0: 10274.8. Samples: 287377440. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:48:58,978][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:00,778][626795] Updated weights for policy 0, policy_version 262412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:02,681][626795] Updated weights for policy 0, policy_version 262422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:03,976][24592] Fps is (10 sec: 41777.2, 60 sec: 41233.2, 300 sec: 41376.5). Total num frames: 2149810176. Throughput: 0: 10282.7. Samples: 287439726. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:03,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000262428_2149810176.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:04,068][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000261216_2139881472.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:04,812][626795] Updated weights for policy 0, policy_version 262432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:06,740][626795] Updated weights for policy 0, policy_version 262442 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:08,713][626795] Updated weights for policy 0, policy_version 262452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:08,976][24592] Fps is (10 sec: 41781.5, 60 sec: 41232.8, 300 sec: 41348.7). Total num frames: 2150014976. Throughput: 0: 10281.8. Samples: 287501862. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:08,982][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:10,748][626795] Updated weights for policy 0, policy_version 262462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:12,695][626795] Updated weights for policy 0, policy_version 262472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:13,975][24592] Fps is (10 sec: 40961.6, 60 sec: 41233.4, 300 sec: 41348.8). Total num frames: 2150219776. Throughput: 0: 10294.1. Samples: 287532564. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:13,976][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:14,682][626795] Updated weights for policy 0, policy_version 262482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:16,742][626795] Updated weights for policy 0, policy_version 262492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:18,670][626795] Updated weights for policy 0, policy_version 262502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:18,975][24592] Fps is (10 sec: 40142.4, 60 sec: 40960.0, 300 sec: 41321.0). Total num frames: 2150416384. Throughput: 0: 10242.1. Samples: 287593290. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:18,979][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:20,862][626795] Updated weights for policy 0, policy_version 262512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:22,692][626795] Updated weights for policy 0, policy_version 262522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:23,976][24592] Fps is (10 sec: 40140.3, 60 sec: 40960.1, 300 sec: 41293.2). Total num frames: 2150621184. Throughput: 0: 10242.5. Samples: 287654976. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:23,979][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:24,804][626795] Updated weights for policy 0, policy_version 262532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:26,778][626795] Updated weights for policy 0, policy_version 262542 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:28,762][626795] Updated weights for policy 0, policy_version 262552 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:28,977][24592] Fps is (10 sec: 41775.9, 60 sec: 41096.0, 300 sec: 41320.9). Total num frames: 2150834176. Throughput: 0: 10266.0. Samples: 287685750. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:28,977][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:30,767][626795] Updated weights for policy 0, policy_version 262562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:32,779][626795] Updated weights for policy 0, policy_version 262572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:33,975][24592] Fps is (10 sec: 41780.1, 60 sec: 40960.0, 300 sec: 41321.0). Total num frames: 2151038976. Throughput: 0: 10264.0. Samples: 287746986. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:33,976][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:34,775][626795] Updated weights for policy 0, policy_version 262582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:36,722][626795] Updated weights for policy 0, policy_version 262592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:38,653][626795] Updated weights for policy 0, policy_version 262602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:38,976][24592] Fps is (10 sec: 40961.3, 60 sec: 41096.9, 300 sec: 41293.2). Total num frames: 2151243776. Throughput: 0: 10282.2. Samples: 287809464. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:38,977][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:40,756][626795] Updated weights for policy 0, policy_version 262612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:42,608][626795] Updated weights for policy 0, policy_version 262622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:43,992][24592] Fps is (10 sec: 40893.6, 60 sec: 41085.5, 300 sec: 41291.0). Total num frames: 2151448576. Throughput: 0: 10274.6. Samples: 287839956. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:43,993][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:44,750][626795] Updated weights for policy 0, policy_version 262632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:46,711][626795] Updated weights for policy 0, policy_version 262642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:48,679][626795] Updated weights for policy 0, policy_version 262652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:48,975][24592] Fps is (10 sec: 40961.9, 60 sec: 41096.5, 300 sec: 41295.6). Total num frames: 2151653376. Throughput: 0: 10254.0. Samples: 287901150. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:48,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:50,758][626795] Updated weights for policy 0, policy_version 262662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:52,699][626795] Updated weights for policy 0, policy_version 262672 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:53,976][24592] Fps is (10 sec: 41025.3, 60 sec: 41096.3, 300 sec: 41293.2). Total num frames: 2151858176. Throughput: 0: 10240.3. Samples: 287962674. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:53,978][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:54,776][626795] Updated weights for policy 0, policy_version 262682 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:56,731][626795] Updated weights for policy 0, policy_version 262692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:49:58,757][626795] Updated weights for policy 0, policy_version 262702 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:58,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41097.1, 300 sec: 41293.3). Total num frames: 2152062976. Throughput: 0: 10226.1. Samples: 287992740. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:49:58,977][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:00,695][626795] Updated weights for policy 0, policy_version 262712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:02,715][626795] Updated weights for policy 0, policy_version 262722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:03,976][24592] Fps is (10 sec: 40959.2, 60 sec: 40960.0, 300 sec: 41265.4). Total num frames: 2152267776. Throughput: 0: 10254.4. Samples: 288054744. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:03,977][24592] Avg episode reward: [(0, '4.965')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:04,756][626795] Updated weights for policy 0, policy_version 262732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:06,620][626795] Updated weights for policy 0, policy_version 262742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:08,622][626795] Updated weights for policy 0, policy_version 262752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:08,975][24592] Fps is (10 sec: 40960.4, 60 sec: 40960.3, 300 sec: 41265.5). Total num frames: 2152472576. Throughput: 0: 10266.9. Samples: 288116982. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:08,976][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:10,638][626795] Updated weights for policy 0, policy_version 262762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:12,542][626795] Updated weights for policy 0, policy_version 262772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:13,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41096.1, 300 sec: 41293.1). Total num frames: 2152685568. Throughput: 0: 10269.5. Samples: 288147876. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:13,978][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:14,579][626795] Updated weights for policy 0, policy_version 262782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:16,654][626795] Updated weights for policy 0, policy_version 262792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:18,612][626795] Updated weights for policy 0, policy_version 262802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:18,976][24592] Fps is (10 sec: 40958.2, 60 sec: 41096.3, 300 sec: 41237.7). Total num frames: 2152882176. Throughput: 0: 10291.2. Samples: 288210096. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:18,978][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:20,586][626795] Updated weights for policy 0, policy_version 262812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:22,495][626795] Updated weights for policy 0, policy_version 262822 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:23,975][24592] Fps is (10 sec: 40962.9, 60 sec: 41233.2, 300 sec: 41265.5). Total num frames: 2153095168. Throughput: 0: 10268.0. Samples: 288271518. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:23,978][24592] Avg episode reward: [(0, '4.379')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:24,576][626795] Updated weights for policy 0, policy_version 262832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:26,506][626795] Updated weights for policy 0, policy_version 262842 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:28,507][626795] Updated weights for policy 0, policy_version 262852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:28,975][24592] Fps is (10 sec: 41780.6, 60 sec: 41097.0, 300 sec: 41265.5). Total num frames: 2153299968. Throughput: 0: 10278.6. Samples: 288302328. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:28,977][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:30,587][626795] Updated weights for policy 0, policy_version 262862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:32,457][626795] Updated weights for policy 0, policy_version 262872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:33,976][24592] Fps is (10 sec: 40958.3, 60 sec: 41096.2, 300 sec: 41265.4). Total num frames: 2153504768. Throughput: 0: 10301.1. Samples: 288364704. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:33,977][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:34,507][626795] Updated weights for policy 0, policy_version 262882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:36,505][626795] Updated weights for policy 0, policy_version 262892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:38,398][626795] Updated weights for policy 0, policy_version 262902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:38,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41096.8, 300 sec: 41237.7). Total num frames: 2153709568. Throughput: 0: 10311.8. Samples: 288426702. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:38,978][24592] Avg episode reward: [(0, '5.000')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:40,437][626795] Updated weights for policy 0, policy_version 262912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:42,425][626795] Updated weights for policy 0, policy_version 262922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:43,975][24592] Fps is (10 sec: 40961.8, 60 sec: 41107.7, 300 sec: 41237.7). Total num frames: 2153914368. Throughput: 0: 10327.8. Samples: 288457488. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:43,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:44,336][626795] Updated weights for policy 0, policy_version 262932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:46,306][626795] Updated weights for policy 0, policy_version 262942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:48,328][626795] Updated weights for policy 0, policy_version 262952 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:48,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41233.0, 300 sec: 41265.5). Total num frames: 2154127360. Throughput: 0: 10338.6. Samples: 288519978. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:48,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:50,486][626795] Updated weights for policy 0, policy_version 262962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:52,337][626795] Updated weights for policy 0, policy_version 262972 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:53,976][24592] Fps is (10 sec: 41776.8, 60 sec: 41232.9, 300 sec: 41265.4). Total num frames: 2154332160. Throughput: 0: 10306.4. Samples: 288580776. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:53,979][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:54,464][626795] Updated weights for policy 0, policy_version 262982 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:56,380][626795] Updated weights for policy 0, policy_version 262992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:50:58,282][626795] Updated weights for policy 0, policy_version 263002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:58,976][24592] Fps is (10 sec: 40959.6, 60 sec: 41233.0, 300 sec: 41237.7). Total num frames: 2154536960. Throughput: 0: 10296.5. Samples: 288611214. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:50:58,976][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:00,371][626795] Updated weights for policy 0, policy_version 263012 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:02,300][626795] Updated weights for policy 0, policy_version 263022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:03,977][24592] Fps is (10 sec: 40956.2, 60 sec: 41232.4, 300 sec: 41237.5). Total num frames: 2154741760. Throughput: 0: 10284.4. Samples: 288672906. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:03,979][24592] Avg episode reward: [(0, '4.347')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000263030_2154741760.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:04,113][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000261825_2144870400.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:04,359][626795] Updated weights for policy 0, policy_version 263032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:06,282][626795] Updated weights for policy 0, policy_version 263042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:08,239][626795] Updated weights for policy 0, policy_version 263052 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:08,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41233.0, 300 sec: 41237.7). Total num frames: 2154946560. Throughput: 0: 10309.5. Samples: 288735444. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:08,976][24592] Avg episode reward: [(0, '4.331')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:10,239][626795] Updated weights for policy 0, policy_version 263062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:12,323][626795] Updated weights for policy 0, policy_version 263072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:13,975][24592] Fps is (10 sec: 40965.8, 60 sec: 41097.0, 300 sec: 41237.9). Total num frames: 2155151360. Throughput: 0: 10283.6. Samples: 288765090. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:13,976][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:14,316][626795] Updated weights for policy 0, policy_version 263082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:16,258][626795] Updated weights for policy 0, policy_version 263092 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:18,198][626795] Updated weights for policy 0, policy_version 263102 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:18,976][24592] Fps is (10 sec: 40959.4, 60 sec: 41233.2, 300 sec: 41209.9). Total num frames: 2155356160. Throughput: 0: 10282.7. Samples: 288827424. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:18,977][24592] Avg episode reward: [(0, '4.987')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:20,262][626795] Updated weights for policy 0, policy_version 263112 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:22,302][626795] Updated weights for policy 0, policy_version 263122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:23,976][24592] Fps is (10 sec: 40958.4, 60 sec: 41096.2, 300 sec: 41182.2). Total num frames: 2155560960. Throughput: 0: 10268.2. Samples: 288888774. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:23,977][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:24,291][626795] Updated weights for policy 0, policy_version 263132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:26,398][626795] Updated weights for policy 0, policy_version 263142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:28,196][626795] Updated weights for policy 0, policy_version 263152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:28,977][24592] Fps is (10 sec: 40956.3, 60 sec: 41095.9, 300 sec: 41182.0). Total num frames: 2155765760. Throughput: 0: 10257.6. Samples: 288919092. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:28,981][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:30,197][626795] Updated weights for policy 0, policy_version 263162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:32,128][626795] Updated weights for policy 0, policy_version 263172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:33,975][24592] Fps is (10 sec: 40961.8, 60 sec: 41096.8, 300 sec: 41182.2). Total num frames: 2155970560. Throughput: 0: 10257.2. Samples: 288981552. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:33,976][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:34,235][626795] Updated weights for policy 0, policy_version 263182 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:36,147][626795] Updated weights for policy 0, policy_version 263192 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:38,225][626795] Updated weights for policy 0, policy_version 263202 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:38,975][24592] Fps is (10 sec: 41783.6, 60 sec: 41233.1, 300 sec: 41182.2). Total num frames: 2156183552. Throughput: 0: 10286.9. Samples: 289043682. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:38,977][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:40,120][626795] Updated weights for policy 0, policy_version 263212 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:42,067][626795] Updated weights for policy 0, policy_version 263222 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:43,976][24592] Fps is (10 sec: 42597.6, 60 sec: 41369.4, 300 sec: 41209.9). Total num frames: 2156396544. Throughput: 0: 10301.2. Samples: 289074768. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:43,979][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:43,980][626795] Updated weights for policy 0, policy_version 263232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:46,002][626795] Updated weights for policy 0, policy_version 263242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:47,948][626795] Updated weights for policy 0, policy_version 263252 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.1, 300 sec: 41209.9). Total num frames: 2156601344. Throughput: 0: 10314.1. Samples: 289137024. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:48,977][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:49,969][626795] Updated weights for policy 0, policy_version 263262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:51,953][626795] Updated weights for policy 0, policy_version 263272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:53,955][626795] Updated weights for policy 0, policy_version 263282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:53,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41233.4, 300 sec: 41182.1). Total num frames: 2156806144. Throughput: 0: 10311.6. Samples: 289199466. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:53,976][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:56,037][626795] Updated weights for policy 0, policy_version 263292 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:57,966][626795] Updated weights for policy 0, policy_version 263302 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:58,975][24592] Fps is (10 sec: 40140.9, 60 sec: 41096.6, 300 sec: 41154.4). Total num frames: 2157002752. Throughput: 0: 10319.7. Samples: 289229478. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:51:58,976][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:51:59,876][626795] Updated weights for policy 0, policy_version 263312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:02,001][626795] Updated weights for policy 0, policy_version 263322 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:03,868][626795] Updated weights for policy 0, policy_version 263332 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:03,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41234.1, 300 sec: 41182.2). Total num frames: 2157215744. Throughput: 0: 10306.4. Samples: 289291212. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:03,976][24592] Avg episode reward: [(0, '5.029')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:05,769][626795] Updated weights for policy 0, policy_version 263342 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:07,806][626795] Updated weights for policy 0, policy_version 263352 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:08,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2157428736. Throughput: 0: 10352.5. Samples: 289354632. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:08,976][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:09,833][626795] Updated weights for policy 0, policy_version 263362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:11,751][626795] Updated weights for policy 0, policy_version 263372 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:13,713][626795] Updated weights for policy 0, policy_version 263382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:13,979][24592] Fps is (10 sec: 41762.8, 60 sec: 41366.9, 300 sec: 41181.6). Total num frames: 2157633536. Throughput: 0: 10369.2. Samples: 289385736. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:13,981][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:15,713][626795] Updated weights for policy 0, policy_version 263392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:17,616][626795] Updated weights for policy 0, policy_version 263402 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:18,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41369.7, 300 sec: 41182.2). Total num frames: 2157838336. Throughput: 0: 10370.8. Samples: 289448238. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:18,976][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:19,652][626795] Updated weights for policy 0, policy_version 263412 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:21,561][626795] Updated weights for policy 0, policy_version 263422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:23,453][626795] Updated weights for policy 0, policy_version 263432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:23,975][24592] Fps is (10 sec: 41795.8, 60 sec: 41506.5, 300 sec: 41209.9). Total num frames: 2158051328. Throughput: 0: 10388.4. Samples: 289511160. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:23,977][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:25,634][626795] Updated weights for policy 0, policy_version 263442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:27,621][626795] Updated weights for policy 0, policy_version 263452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:28,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41370.4, 300 sec: 41154.4). Total num frames: 2158247936. Throughput: 0: 10353.7. Samples: 289540680. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:28,977][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:29,616][626795] Updated weights for policy 0, policy_version 263462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:31,575][626795] Updated weights for policy 0, policy_version 263472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:33,498][626795] Updated weights for policy 0, policy_version 263482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:33,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41506.1, 300 sec: 41182.2). Total num frames: 2158460928. Throughput: 0: 10352.0. Samples: 289602864. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:33,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:35,442][626795] Updated weights for policy 0, policy_version 263492 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:37,502][626795] Updated weights for policy 0, policy_version 263502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:38,976][24592] Fps is (10 sec: 41777.6, 60 sec: 41369.4, 300 sec: 41182.3). Total num frames: 2158665728. Throughput: 0: 10342.2. Samples: 289664868. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:38,979][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:39,494][626795] Updated weights for policy 0, policy_version 263512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:41,466][626795] Updated weights for policy 0, policy_version 263522 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:43,458][626795] Updated weights for policy 0, policy_version 263532 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:43,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41233.2, 300 sec: 41182.2). Total num frames: 2158870528. Throughput: 0: 10360.1. Samples: 289695684. Policy #0 lag: (min: 0.0, avg: 1.9, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:43,976][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:45,405][626795] Updated weights for policy 0, policy_version 263542 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:47,474][626795] Updated weights for policy 0, policy_version 263552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:48,976][24592] Fps is (10 sec: 40960.4, 60 sec: 41232.9, 300 sec: 41182.2). Total num frames: 2159075328. Throughput: 0: 10376.5. Samples: 289758156. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:48,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:49,267][626795] Updated weights for policy 0, policy_version 263562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:51,307][626795] Updated weights for policy 0, policy_version 263572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:53,335][626795] Updated weights for policy 0, policy_version 263582 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:53,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41369.7, 300 sec: 41209.9). Total num frames: 2159288320. Throughput: 0: 10346.7. Samples: 289820232. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:53,976][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:55,368][626795] Updated weights for policy 0, policy_version 263592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:57,262][626795] Updated weights for policy 0, policy_version 263602 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:58,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41506.1, 300 sec: 41210.0). Total num frames: 2159493120. Throughput: 0: 10334.4. Samples: 289850742. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:52:58,977][24592] Avg episode reward: [(0, '4.917')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:52:59,337][626795] Updated weights for policy 0, policy_version 263612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:01,351][626795] Updated weights for policy 0, policy_version 263622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:03,254][626795] Updated weights for policy 0, policy_version 263632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:03,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2159697920. Throughput: 0: 10307.9. Samples: 289912092. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:03,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:03,988][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000263636_2159706112.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:04,085][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000262428_2149810176.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:05,423][626795] Updated weights for policy 0, policy_version 263642 (0.0042)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:07,325][626795] Updated weights for policy 0, policy_version 263652 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:08,976][24592] Fps is (10 sec: 40958.9, 60 sec: 41232.9, 300 sec: 41209.9). Total num frames: 2159902720. Throughput: 0: 10280.7. Samples: 289973796. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:08,978][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:09,262][626795] Updated weights for policy 0, policy_version 263662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:11,292][626795] Updated weights for policy 0, policy_version 263672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:13,236][626795] Updated weights for policy 0, policy_version 263682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:13,976][24592] Fps is (10 sec: 40959.7, 60 sec: 41235.7, 300 sec: 41182.2). Total num frames: 2160107520. Throughput: 0: 10317.3. Samples: 290004960. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:13,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:15,311][626795] Updated weights for policy 0, policy_version 263692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:17,262][626795] Updated weights for policy 0, policy_version 263702 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:18,975][24592] Fps is (10 sec: 40960.9, 60 sec: 41233.0, 300 sec: 41182.2). Total num frames: 2160312320. Throughput: 0: 10306.1. Samples: 290066640. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:18,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:19,289][626795] Updated weights for policy 0, policy_version 263712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:21,182][626795] Updated weights for policy 0, policy_version 263722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:23,119][626795] Updated weights for policy 0, policy_version 263732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:23,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41232.8, 300 sec: 41209.9). Total num frames: 2160525312. Throughput: 0: 10327.3. Samples: 290129598. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:23,977][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:25,107][626795] Updated weights for policy 0, policy_version 263742 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:27,061][626795] Updated weights for policy 0, policy_version 263752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:28,919][626795] Updated weights for policy 0, policy_version 263762 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:28,976][24592] Fps is (10 sec: 42595.1, 60 sec: 41505.5, 300 sec: 41209.8). Total num frames: 2160738304. Throughput: 0: 10333.8. Samples: 290160714. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:28,977][24592] Avg episode reward: [(0, '4.976')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:30,971][626795] Updated weights for policy 0, policy_version 263772 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:32,923][626795] Updated weights for policy 0, policy_version 263782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:33,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41369.6, 300 sec: 41237.8). Total num frames: 2160943104. Throughput: 0: 10329.9. Samples: 290223000. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:33,976][24592] Avg episode reward: [(0, '4.927')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:35,029][626795] Updated weights for policy 0, policy_version 263792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:37,022][626795] Updated weights for policy 0, policy_version 263802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:38,923][626795] Updated weights for policy 0, policy_version 263812 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:38,975][24592] Fps is (10 sec: 40963.2, 60 sec: 41369.8, 300 sec: 41237.7). Total num frames: 2161147904. Throughput: 0: 10326.4. Samples: 290284920. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:38,977][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:40,889][626795] Updated weights for policy 0, policy_version 263822 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:42,880][626795] Updated weights for policy 0, policy_version 263832 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:43,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41369.6, 300 sec: 41237.7). Total num frames: 2161352704. Throughput: 0: 10337.7. Samples: 290315940. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:43,977][24592] Avg episode reward: [(0, '4.432')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:44,807][626795] Updated weights for policy 0, policy_version 263842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:46,786][626795] Updated weights for policy 0, policy_version 263852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:48,744][626795] Updated weights for policy 0, policy_version 263862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:48,979][24592] Fps is (10 sec: 41764.8, 60 sec: 41503.8, 300 sec: 41265.0). Total num frames: 2161565696. Throughput: 0: 10371.4. Samples: 290378844. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:48,983][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:50,858][626795] Updated weights for policy 0, policy_version 263872 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:52,694][626795] Updated weights for policy 0, policy_version 263882 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:53,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.6, 300 sec: 41265.6). Total num frames: 2161770496. Throughput: 0: 10387.0. Samples: 290441208. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:53,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:54,615][626795] Updated weights for policy 0, policy_version 263892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:56,687][626795] Updated weights for policy 0, policy_version 263902 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:58,242][626772] Signal inference workers to stop experience collection... (3850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:58,243][626772] Signal inference workers to resume experience collection... (3850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:58,250][626795] InferenceWorker_p0-w0: stopping experience collection (3850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:58,259][626795] InferenceWorker_p0-w0: resuming experience collection (3850 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:53:58,553][626795] Updated weights for policy 0, policy_version 263912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:58,976][24592] Fps is (10 sec: 40973.5, 60 sec: 41369.5, 300 sec: 41237.7). Total num frames: 2161975296. Throughput: 0: 10384.5. Samples: 290472264. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:53:58,981][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:00,580][626795] Updated weights for policy 0, policy_version 263922 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:02,618][626795] Updated weights for policy 0, policy_version 263932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:03,976][24592] Fps is (10 sec: 41776.6, 60 sec: 41505.7, 300 sec: 41265.4). Total num frames: 2162188288. Throughput: 0: 10394.4. Samples: 290534394. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:03,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:04,657][626795] Updated weights for policy 0, policy_version 263942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:06,515][626795] Updated weights for policy 0, policy_version 263952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:08,605][626795] Updated weights for policy 0, policy_version 263962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:08,975][24592] Fps is (10 sec: 41780.3, 60 sec: 41506.4, 300 sec: 41265.5). Total num frames: 2162393088. Throughput: 0: 10364.9. Samples: 290596014. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:08,977][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:10,557][626795] Updated weights for policy 0, policy_version 263972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:12,592][626795] Updated weights for policy 0, policy_version 263982 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:13,976][24592] Fps is (10 sec: 40960.6, 60 sec: 41505.9, 300 sec: 41293.2). Total num frames: 2162597888. Throughput: 0: 10355.7. Samples: 290626716. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:13,978][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:14,528][626795] Updated weights for policy 0, policy_version 263992 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:16,542][626795] Updated weights for policy 0, policy_version 264002 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:18,421][626795] Updated weights for policy 0, policy_version 264012 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:18,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41506.2, 300 sec: 41293.3). Total num frames: 2162802688. Throughput: 0: 10343.2. Samples: 290688444. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:18,976][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:20,542][626795] Updated weights for policy 0, policy_version 264022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:22,347][626795] Updated weights for policy 0, policy_version 264032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:23,976][24592] Fps is (10 sec: 41779.0, 60 sec: 41506.0, 300 sec: 41293.3). Total num frames: 2163015680. Throughput: 0: 10371.5. Samples: 290751642. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:23,978][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:24,464][626795] Updated weights for policy 0, policy_version 264042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:26,309][626795] Updated weights for policy 0, policy_version 264052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:28,321][626795] Updated weights for policy 0, policy_version 264062 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:28,976][24592] Fps is (10 sec: 41777.1, 60 sec: 41369.9, 300 sec: 41293.2). Total num frames: 2163220480. Throughput: 0: 10362.2. Samples: 290782242. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:28,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:30,197][626795] Updated weights for policy 0, policy_version 264072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:32,274][626795] Updated weights for policy 0, policy_version 264082 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:33,976][24592] Fps is (10 sec: 41779.3, 60 sec: 41505.8, 300 sec: 41321.0). Total num frames: 2163433472. Throughput: 0: 10358.8. Samples: 290844960. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:33,978][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:34,183][626795] Updated weights for policy 0, policy_version 264092 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:36,219][626795] Updated weights for policy 0, policy_version 264102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:38,200][626795] Updated weights for policy 0, policy_version 264112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:38,976][24592] Fps is (10 sec: 40959.4, 60 sec: 41369.3, 300 sec: 41295.4). Total num frames: 2163630080. Throughput: 0: 10345.2. Samples: 290906748. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:38,977][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:40,149][626795] Updated weights for policy 0, policy_version 264122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:42,151][626795] Updated weights for policy 0, policy_version 264132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:43,976][24592] Fps is (10 sec: 40142.1, 60 sec: 41369.5, 300 sec: 41293.2). Total num frames: 2163834880. Throughput: 0: 10329.9. Samples: 290937108. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:43,977][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:44,299][626795] Updated weights for policy 0, policy_version 264142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:46,222][626795] Updated weights for policy 0, policy_version 264152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:48,082][626795] Updated weights for policy 0, policy_version 264162 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:48,975][24592] Fps is (10 sec: 41781.7, 60 sec: 41372.1, 300 sec: 41321.0). Total num frames: 2164047872. Throughput: 0: 10342.8. Samples: 290999814. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:48,977][24592] Avg episode reward: [(0, '4.869')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:50,098][626795] Updated weights for policy 0, policy_version 264172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:52,107][626795] Updated weights for policy 0, policy_version 264182 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:53,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2164252672. Throughput: 0: 10356.0. Samples: 291062034. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:53,977][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:54,005][626795] Updated weights for policy 0, policy_version 264192 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:56,051][626795] Updated weights for policy 0, policy_version 264202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:58,001][626795] Updated weights for policy 0, policy_version 264212 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:58,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41506.1, 300 sec: 41348.8). Total num frames: 2164465664. Throughput: 0: 10359.4. Samples: 291092886. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:54:58,977][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:54:59,898][626795] Updated weights for policy 0, policy_version 264222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:01,845][626795] Updated weights for policy 0, policy_version 264232 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:03,891][626795] Updated weights for policy 0, policy_version 264242 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:03,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41370.0, 300 sec: 41348.8). Total num frames: 2164670464. Throughput: 0: 10379.2. Samples: 291155508. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:03,978][24592] Avg episode reward: [(0, '4.893')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:04,015][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000264243_2164678656.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:04,140][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000263030_2154741760.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:05,844][626795] Updated weights for policy 0, policy_version 264252 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:07,839][626795] Updated weights for policy 0, policy_version 264262 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:08,976][24592] Fps is (10 sec: 40960.5, 60 sec: 41369.4, 300 sec: 41321.1). Total num frames: 2164875264. Throughput: 0: 10342.3. Samples: 291217044. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:08,978][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:09,931][626795] Updated weights for policy 0, policy_version 264272 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:11,960][626795] Updated weights for policy 0, policy_version 264282 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:13,905][626795] Updated weights for policy 0, policy_version 264292 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:13,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41369.9, 300 sec: 41348.8). Total num frames: 2165080064. Throughput: 0: 10342.5. Samples: 291247650. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:13,979][24592] Avg episode reward: [(0, '5.006')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:15,811][626795] Updated weights for policy 0, policy_version 264302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:17,872][626795] Updated weights for policy 0, policy_version 264312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:18,976][24592] Fps is (10 sec: 40957.4, 60 sec: 41369.0, 300 sec: 41320.9). Total num frames: 2165284864. Throughput: 0: 10328.3. Samples: 291309738. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:18,979][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:19,864][626795] Updated weights for policy 0, policy_version 264322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:21,733][626795] Updated weights for policy 0, policy_version 264332 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:23,796][626795] Updated weights for policy 0, policy_version 264342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:23,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41370.0, 300 sec: 41348.8). Total num frames: 2165497856. Throughput: 0: 10352.3. Samples: 291372594. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:23,976][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:25,589][626795] Updated weights for policy 0, policy_version 264352 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:27,616][626795] Updated weights for policy 0, policy_version 264362 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:28,975][24592] Fps is (10 sec: 41782.8, 60 sec: 41369.9, 300 sec: 41348.8). Total num frames: 2165702656. Throughput: 0: 10374.6. Samples: 291403962. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:28,976][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:29,547][626795] Updated weights for policy 0, policy_version 264372 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:31,556][626795] Updated weights for policy 0, policy_version 264382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:33,402][626795] Updated weights for policy 0, policy_version 264392 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:33,975][24592] Fps is (10 sec: 41778.4, 60 sec: 41369.9, 300 sec: 41376.5). Total num frames: 2165915648. Throughput: 0: 10379.0. Samples: 291466872. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:33,979][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:35,440][626795] Updated weights for policy 0, policy_version 264402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:37,453][626795] Updated weights for policy 0, policy_version 264412 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:38,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41643.0, 300 sec: 41404.3). Total num frames: 2166128640. Throughput: 0: 10391.6. Samples: 291529656. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:38,976][24592] Avg episode reward: [(0, '4.386')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:39,388][626795] Updated weights for policy 0, policy_version 264422 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:41,385][626795] Updated weights for policy 0, policy_version 264432 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:43,409][626795] Updated weights for policy 0, policy_version 264442 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:43,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41506.2, 300 sec: 41348.8). Total num frames: 2166325248. Throughput: 0: 10376.7. Samples: 291559836. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:43,976][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:45,450][626795] Updated weights for policy 0, policy_version 264452 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:47,376][626795] Updated weights for policy 0, policy_version 264462 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:48,975][24592] Fps is (10 sec: 40141.1, 60 sec: 41369.6, 300 sec: 41348.8). Total num frames: 2166530048. Throughput: 0: 10360.9. Samples: 291621750. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:48,978][24592] Avg episode reward: [(0, '4.325')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:49,371][626795] Updated weights for policy 0, policy_version 264472 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:51,422][626795] Updated weights for policy 0, policy_version 264482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:53,415][626795] Updated weights for policy 0, policy_version 264492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:53,976][24592] Fps is (10 sec: 40959.4, 60 sec: 41369.5, 300 sec: 41348.8). Total num frames: 2166734848. Throughput: 0: 10352.3. Samples: 291682896. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:53,977][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:55,389][626795] Updated weights for policy 0, policy_version 264502 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:57,289][626795] Updated weights for policy 0, policy_version 264512 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:58,976][24592] Fps is (10 sec: 40958.2, 60 sec: 41233.0, 300 sec: 41348.9). Total num frames: 2166939648. Throughput: 0: 10359.9. Samples: 291713850. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:55:58,978][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:55:59,270][626795] Updated weights for policy 0, policy_version 264522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:01,265][626795] Updated weights for policy 0, policy_version 264532 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:03,207][626795] Updated weights for policy 0, policy_version 264542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:03,983][24592] Fps is (10 sec: 41749.0, 60 sec: 41364.5, 300 sec: 41375.5). Total num frames: 2167152640. Throughput: 0: 10382.0. Samples: 291776994. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:03,985][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:05,204][626795] Updated weights for policy 0, policy_version 264552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:07,183][626795] Updated weights for policy 0, policy_version 264562 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:08,975][24592] Fps is (10 sec: 42600.4, 60 sec: 41506.3, 300 sec: 41404.3). Total num frames: 2167365632. Throughput: 0: 10371.8. Samples: 291839328. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:08,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:09,119][626795] Updated weights for policy 0, policy_version 264572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:11,171][626795] Updated weights for policy 0, policy_version 264582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:13,074][626795] Updated weights for policy 0, policy_version 264592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:13,976][24592] Fps is (10 sec: 41809.7, 60 sec: 41506.1, 300 sec: 41404.3). Total num frames: 2167570432. Throughput: 0: 10364.8. Samples: 291870378. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:13,977][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:15,094][626795] Updated weights for policy 0, policy_version 264602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:17,068][626795] Updated weights for policy 0, policy_version 264612 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:18,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41506.7, 300 sec: 41404.4). Total num frames: 2167775232. Throughput: 0: 10323.7. Samples: 291931440. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:18,976][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:19,029][626795] Updated weights for policy 0, policy_version 264622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:21,033][626795] Updated weights for policy 0, policy_version 264632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:23,009][626795] Updated weights for policy 0, policy_version 264642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:23,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41506.0, 300 sec: 41432.2). Total num frames: 2167988224. Throughput: 0: 10319.1. Samples: 291994014. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:23,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:25,050][626795] Updated weights for policy 0, policy_version 264652 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:26,949][626795] Updated weights for policy 0, policy_version 264662 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:28,926][626795] Updated weights for policy 0, policy_version 264672 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:28,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41506.2, 300 sec: 41432.1). Total num frames: 2168193024. Throughput: 0: 10341.1. Samples: 292025184. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:28,976][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:30,818][626795] Updated weights for policy 0, policy_version 264682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:32,919][626795] Updated weights for policy 0, policy_version 264692 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.1, 300 sec: 41432.1). Total num frames: 2168406016. Throughput: 0: 10362.8. Samples: 292088076. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:33,978][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:34,754][626795] Updated weights for policy 0, policy_version 264702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:36,683][626795] Updated weights for policy 0, policy_version 264712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:38,610][626795] Updated weights for policy 0, policy_version 264722 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:38,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.7, 300 sec: 41404.3). Total num frames: 2168610816. Throughput: 0: 10408.7. Samples: 292151286. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:38,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:40,635][626795] Updated weights for policy 0, policy_version 264732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:42,665][626795] Updated weights for policy 0, policy_version 264742 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:43,976][24592] Fps is (10 sec: 40959.7, 60 sec: 41506.0, 300 sec: 41404.3). Total num frames: 2168815616. Throughput: 0: 10401.5. Samples: 292181916. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:43,978][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:44,760][626795] Updated weights for policy 0, policy_version 264752 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:46,653][626795] Updated weights for policy 0, policy_version 264762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:48,610][626795] Updated weights for policy 0, policy_version 264772 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:48,976][24592] Fps is (10 sec: 40958.9, 60 sec: 41506.0, 300 sec: 41404.3). Total num frames: 2169020416. Throughput: 0: 10377.9. Samples: 292243926. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:48,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:50,596][626795] Updated weights for policy 0, policy_version 264782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:52,568][626795] Updated weights for policy 0, policy_version 264792 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:53,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41506.2, 300 sec: 41432.1). Total num frames: 2169225216. Throughput: 0: 10359.6. Samples: 292305510. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:53,976][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:54,522][626795] Updated weights for policy 0, policy_version 264802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:56,573][626795] Updated weights for policy 0, policy_version 264812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:56:58,606][626795] Updated weights for policy 0, policy_version 264822 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:58,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41642.7, 300 sec: 41432.0). Total num frames: 2169438208. Throughput: 0: 10359.9. Samples: 292336578. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:56:58,978][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:00,449][626795] Updated weights for policy 0, policy_version 264832 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:02,511][626795] Updated weights for policy 0, policy_version 264842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:03,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41511.2, 300 sec: 41404.3). Total num frames: 2169643008. Throughput: 0: 10394.1. Samples: 292399176. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:03,977][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000264849_2169643008.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:04,059][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000263636_2159706112.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:04,472][626795] Updated weights for policy 0, policy_version 264852 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:06,445][626795] Updated weights for policy 0, policy_version 264862 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:08,368][626795] Updated weights for policy 0, policy_version 264872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:08,975][24592] Fps is (10 sec: 41780.8, 60 sec: 41506.1, 300 sec: 41432.6). Total num frames: 2169856000. Throughput: 0: 10397.2. Samples: 292461888. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:08,977][24592] Avg episode reward: [(0, '5.035')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:10,309][626795] Updated weights for policy 0, policy_version 264882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:12,236][626795] Updated weights for policy 0, policy_version 264892 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:13,976][24592] Fps is (10 sec: 42596.7, 60 sec: 41642.4, 300 sec: 41459.8). Total num frames: 2170068992. Throughput: 0: 10400.5. Samples: 292493214. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:13,978][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:14,286][626795] Updated weights for policy 0, policy_version 264902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:16,218][626795] Updated weights for policy 0, policy_version 264912 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:18,239][626795] Updated weights for policy 0, policy_version 264922 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:18,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41506.0, 300 sec: 41404.3). Total num frames: 2170265600. Throughput: 0: 10384.4. Samples: 292555374. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:18,977][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:20,214][626795] Updated weights for policy 0, policy_version 264932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:22,260][626795] Updated weights for policy 0, policy_version 264942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:23,981][24592] Fps is (10 sec: 40120.1, 60 sec: 41365.7, 300 sec: 41431.3). Total num frames: 2170470400. Throughput: 0: 10343.1. Samples: 292616784. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:23,982][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:24,198][626795] Updated weights for policy 0, policy_version 264952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:26,333][626795] Updated weights for policy 0, policy_version 264962 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:28,067][626795] Updated weights for policy 0, policy_version 264972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:28,977][24592] Fps is (10 sec: 41773.5, 60 sec: 41505.0, 300 sec: 41431.9). Total num frames: 2170683392. Throughput: 0: 10343.8. Samples: 292647402. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:28,978][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:30,215][626795] Updated weights for policy 0, policy_version 264982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:32,129][626795] Updated weights for policy 0, policy_version 264992 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:33,976][24592] Fps is (10 sec: 40982.5, 60 sec: 41233.0, 300 sec: 41404.3). Total num frames: 2170880000. Throughput: 0: 10343.2. Samples: 292709370. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:33,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:34,240][626795] Updated weights for policy 0, policy_version 265002 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:36,061][626795] Updated weights for policy 0, policy_version 265012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:38,047][626795] Updated weights for policy 0, policy_version 265022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:38,975][24592] Fps is (10 sec: 40966.3, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2171092992. Throughput: 0: 10360.9. Samples: 292771752. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:38,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:40,090][626795] Updated weights for policy 0, policy_version 265032 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:42,074][626795] Updated weights for policy 0, policy_version 265042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:43,934][626795] Updated weights for policy 0, policy_version 265052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:43,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41506.2, 300 sec: 41459.9). Total num frames: 2171305984. Throughput: 0: 10372.5. Samples: 292803336. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:43,976][24592] Avg episode reward: [(0, '4.837')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:45,874][626795] Updated weights for policy 0, policy_version 265062 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:47,870][626795] Updated weights for policy 0, policy_version 265072 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.3, 300 sec: 41432.1). Total num frames: 2171510784. Throughput: 0: 10371.2. Samples: 292865880. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:48,976][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:49,975][626795] Updated weights for policy 0, policy_version 265082 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:51,877][626795] Updated weights for policy 0, policy_version 265092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:53,886][626795] Updated weights for policy 0, policy_version 265102 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:53,976][24592] Fps is (10 sec: 40958.1, 60 sec: 41505.8, 300 sec: 41432.0). Total num frames: 2171715584. Throughput: 0: 10338.6. Samples: 292927128. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:53,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:55,870][626795] Updated weights for policy 0, policy_version 265112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:57,777][626795] Updated weights for policy 0, policy_version 265122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:58,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41369.9, 300 sec: 41432.1). Total num frames: 2171920384. Throughput: 0: 10325.2. Samples: 292957842. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:57:58,976][24592] Avg episode reward: [(0, '4.431')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:57:59,891][626795] Updated weights for policy 0, policy_version 265132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:01,679][626795] Updated weights for policy 0, policy_version 265142 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:03,858][626795] Updated weights for policy 0, policy_version 265152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:03,975][24592] Fps is (10 sec: 40962.1, 60 sec: 41369.7, 300 sec: 41432.1). Total num frames: 2172125184. Throughput: 0: 10333.0. Samples: 293020356. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:03,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:05,667][626795] Updated weights for policy 0, policy_version 265162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:07,713][626795] Updated weights for policy 0, policy_version 265172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:08,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41369.6, 300 sec: 41459.9). Total num frames: 2172338176. Throughput: 0: 10362.0. Samples: 293083014. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:08,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:09,625][626795] Updated weights for policy 0, policy_version 265182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:11,636][626795] Updated weights for policy 0, policy_version 265192 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:13,530][626795] Updated weights for policy 0, policy_version 265202 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:13,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41233.4, 300 sec: 41459.9). Total num frames: 2172542976. Throughput: 0: 10369.3. Samples: 293114004. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:13,976][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:15,621][626795] Updated weights for policy 0, policy_version 265212 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:17,485][626795] Updated weights for policy 0, policy_version 265222 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:18,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41506.3, 300 sec: 41459.9). Total num frames: 2172755968. Throughput: 0: 10396.2. Samples: 293177196. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:18,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:19,519][626795] Updated weights for policy 0, policy_version 265232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:21,550][626795] Updated weights for policy 0, policy_version 265242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:23,507][626795] Updated weights for policy 0, policy_version 265252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41510.0, 300 sec: 41432.2). Total num frames: 2172960768. Throughput: 0: 10373.9. Samples: 293238576. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:23,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:25,521][626795] Updated weights for policy 0, policy_version 265262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:27,410][626795] Updated weights for policy 0, policy_version 265272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:28,977][24592] Fps is (10 sec: 40955.0, 60 sec: 41369.8, 300 sec: 41431.9). Total num frames: 2173165568. Throughput: 0: 10347.9. Samples: 293269002. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:28,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:29,428][626795] Updated weights for policy 0, policy_version 265282 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:31,435][626795] Updated weights for policy 0, policy_version 265292 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:33,362][626795] Updated weights for policy 0, policy_version 265302 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41642.8, 300 sec: 41459.9). Total num frames: 2173378560. Throughput: 0: 10340.5. Samples: 293331204. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:33,976][24592] Avg episode reward: [(0, '5.014')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:35,339][626795] Updated weights for policy 0, policy_version 265312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:37,309][626795] Updated weights for policy 0, policy_version 265322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:38,975][24592] Fps is (10 sec: 41784.4, 60 sec: 41506.2, 300 sec: 41459.9). Total num frames: 2173583360. Throughput: 0: 10378.8. Samples: 293394168. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:38,976][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:39,334][626795] Updated weights for policy 0, policy_version 265332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:41,285][626795] Updated weights for policy 0, policy_version 265342 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:43,340][626795] Updated weights for policy 0, policy_version 265352 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:43,976][24592] Fps is (10 sec: 40957.0, 60 sec: 41369.1, 300 sec: 41432.5). Total num frames: 2173788160. Throughput: 0: 10375.4. Samples: 293424744. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:43,978][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:45,380][626795] Updated weights for policy 0, policy_version 265362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:47,409][626795] Updated weights for policy 0, policy_version 265372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:48,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2173992960. Throughput: 0: 10349.2. Samples: 293486070. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:48,976][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:49,222][626795] Updated weights for policy 0, policy_version 265382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:51,249][626795] Updated weights for policy 0, policy_version 265392 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:53,281][626795] Updated weights for policy 0, policy_version 265402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:53,976][24592] Fps is (10 sec: 40960.7, 60 sec: 41369.5, 300 sec: 41432.0). Total num frames: 2174197760. Throughput: 0: 10336.5. Samples: 293548164. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:53,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:55,222][626795] Updated weights for policy 0, policy_version 265412 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:57,293][626795] Updated weights for policy 0, policy_version 265422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:58,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41505.9, 300 sec: 41432.1). Total num frames: 2174410752. Throughput: 0: 10329.3. Samples: 293578824. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:58:58,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:58:59,236][626795] Updated weights for policy 0, policy_version 265432 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:01,109][626795] Updated weights for policy 0, policy_version 265442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:03,159][626795] Updated weights for policy 0, policy_version 265452 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:03,976][24592] Fps is (10 sec: 41780.9, 60 sec: 41506.0, 300 sec: 41432.1). Total num frames: 2174615552. Throughput: 0: 10314.6. Samples: 293641356. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:03,976][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000265456_2174615552.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:04,127][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000264243_2164678656.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:05,156][626795] Updated weights for policy 0, policy_version 265462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:07,159][626795] Updated weights for policy 0, policy_version 265472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:08,975][24592] Fps is (10 sec: 40961.1, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2174820352. Throughput: 0: 10330.5. Samples: 293703450. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:08,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:09,107][626795] Updated weights for policy 0, policy_version 265482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:11,177][626795] Updated weights for policy 0, policy_version 265492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:13,103][626795] Updated weights for policy 0, policy_version 265502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:13,976][24592] Fps is (10 sec: 40959.6, 60 sec: 41369.4, 300 sec: 41432.0). Total num frames: 2175025152. Throughput: 0: 10330.3. Samples: 293733858. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:13,978][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:15,052][626795] Updated weights for policy 0, policy_version 265512 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:16,996][626795] Updated weights for policy 0, policy_version 265522 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:18,957][626795] Updated weights for policy 0, policy_version 265532 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.6, 300 sec: 41432.2). Total num frames: 2175238144. Throughput: 0: 10338.3. Samples: 293796426. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:18,976][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:21,012][626795] Updated weights for policy 0, policy_version 265542 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:22,798][626795] Updated weights for policy 0, policy_version 265552 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:23,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2175442944. Throughput: 0: 10332.2. Samples: 293859120. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:23,976][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:24,918][626795] Updated weights for policy 0, policy_version 265562 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:26,928][626795] Updated weights for policy 0, policy_version 265572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:28,864][626795] Updated weights for policy 0, policy_version 265582 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:28,977][24592] Fps is (10 sec: 40955.5, 60 sec: 41369.7, 300 sec: 41404.2). Total num frames: 2175647744. Throughput: 0: 10321.9. Samples: 293889234. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:28,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:30,924][626795] Updated weights for policy 0, policy_version 265592 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:31,113][626772] Signal inference workers to stop experience collection... (3900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:31,116][626772] Signal inference workers to resume experience collection... (3900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:31,126][626795] InferenceWorker_p0-w0: stopping experience collection (3900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:31,133][626795] InferenceWorker_p0-w0: resuming experience collection (3900 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:32,820][626795] Updated weights for policy 0, policy_version 265602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:33,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41232.9, 300 sec: 41432.1). Total num frames: 2175852544. Throughput: 0: 10348.7. Samples: 293951766. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:33,979][24592] Avg episode reward: [(0, '4.877')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:34,875][626795] Updated weights for policy 0, policy_version 265612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:36,892][626795] Updated weights for policy 0, policy_version 265622 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:38,741][626795] Updated weights for policy 0, policy_version 265632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:38,981][24592] Fps is (10 sec: 41758.8, 60 sec: 41365.5, 300 sec: 41459.0). Total num frames: 2176065536. Throughput: 0: 10337.7. Samples: 294013416. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:38,983][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:40,840][626795] Updated weights for policy 0, policy_version 265642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:42,793][626795] Updated weights for policy 0, policy_version 265652 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:43,975][24592] Fps is (10 sec: 41780.6, 60 sec: 41370.1, 300 sec: 41432.1). Total num frames: 2176270336. Throughput: 0: 10339.9. Samples: 294044118. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:43,978][24592] Avg episode reward: [(0, '5.096')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:44,713][626795] Updated weights for policy 0, policy_version 265662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:46,711][626795] Updated weights for policy 0, policy_version 265672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:48,595][626795] Updated weights for policy 0, policy_version 265682 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:48,975][24592] Fps is (10 sec: 40984.1, 60 sec: 41369.5, 300 sec: 41432.1). Total num frames: 2176475136. Throughput: 0: 10342.7. Samples: 294106776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:48,976][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:50,695][626795] Updated weights for policy 0, policy_version 265692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:52,665][626795] Updated weights for policy 0, policy_version 265702 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:53,976][24592] Fps is (10 sec: 41777.8, 60 sec: 41506.3, 300 sec: 41432.1). Total num frames: 2176688128. Throughput: 0: 10350.3. Samples: 294169218. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:53,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:54,496][626795] Updated weights for policy 0, policy_version 265712 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:56,642][626795] Updated weights for policy 0, policy_version 265722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 15:59:58,622][626795] Updated weights for policy 0, policy_version 265732 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:58,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41233.2, 300 sec: 41404.3). Total num frames: 2176884736. Throughput: 0: 10359.3. Samples: 294200022. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 15:59:58,977][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:00,529][626795] Updated weights for policy 0, policy_version 265742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:02,621][626795] Updated weights for policy 0, policy_version 265752 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:03,975][24592] Fps is (10 sec: 40961.1, 60 sec: 41369.7, 300 sec: 41432.1). Total num frames: 2177097728. Throughput: 0: 10338.9. Samples: 294261678. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:03,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:04,641][626795] Updated weights for policy 0, policy_version 265762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:06,541][626795] Updated weights for policy 0, policy_version 265772 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:08,486][626795] Updated weights for policy 0, policy_version 265782 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:08,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2177302528. Throughput: 0: 10333.2. Samples: 294324114. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:08,976][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:10,450][626795] Updated weights for policy 0, policy_version 265792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:12,496][626795] Updated weights for policy 0, policy_version 265802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41506.4, 300 sec: 41460.0). Total num frames: 2177515520. Throughput: 0: 10347.6. Samples: 294354864. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:13,977][24592] Avg episode reward: [(0, '4.259')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:14,513][626795] Updated weights for policy 0, policy_version 265812 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:16,494][626795] Updated weights for policy 0, policy_version 265822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:18,312][626795] Updated weights for policy 0, policy_version 265832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2177720320. Throughput: 0: 10338.3. Samples: 294416988. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:18,976][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:20,396][626795] Updated weights for policy 0, policy_version 265842 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:22,313][626795] Updated weights for policy 0, policy_version 265852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:23,976][24592] Fps is (10 sec: 40959.4, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2177925120. Throughput: 0: 10359.1. Samples: 294479514. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:23,978][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:24,447][626795] Updated weights for policy 0, policy_version 265862 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:26,195][626795] Updated weights for policy 0, policy_version 265872 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:28,247][626795] Updated weights for policy 0, policy_version 265882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:28,976][24592] Fps is (10 sec: 40958.6, 60 sec: 41370.1, 300 sec: 41404.3). Total num frames: 2178129920. Throughput: 0: 10349.4. Samples: 294509844. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:28,977][24592] Avg episode reward: [(0, '5.004')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:30,268][626795] Updated weights for policy 0, policy_version 265892 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:32,207][626795] Updated weights for policy 0, policy_version 265902 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:33,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41369.7, 300 sec: 41376.5). Total num frames: 2178334720. Throughput: 0: 10334.9. Samples: 294571848. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:33,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:34,342][626795] Updated weights for policy 0, policy_version 265912 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:36,360][626795] Updated weights for policy 0, policy_version 265922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:38,108][626795] Updated weights for policy 0, policy_version 265932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:38,976][24592] Fps is (10 sec: 40960.2, 60 sec: 41237.0, 300 sec: 41404.3). Total num frames: 2178539520. Throughput: 0: 10321.6. Samples: 294633690. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:38,988][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:40,263][626795] Updated weights for policy 0, policy_version 265942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:42,129][626795] Updated weights for policy 0, policy_version 265952 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:43,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.5, 300 sec: 41432.1). Total num frames: 2178752512. Throughput: 0: 10333.6. Samples: 294665034. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:43,976][24592] Avg episode reward: [(0, '4.857')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:44,239][626795] Updated weights for policy 0, policy_version 265962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:46,218][626795] Updated weights for policy 0, policy_version 265972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:48,120][626795] Updated weights for policy 0, policy_version 265982 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:48,975][24592] Fps is (10 sec: 41780.3, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2178957312. Throughput: 0: 10341.9. Samples: 294727062. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:48,979][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:50,189][626795] Updated weights for policy 0, policy_version 265992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:52,116][626795] Updated weights for policy 0, policy_version 266002 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:53,976][24592] Fps is (10 sec: 40959.0, 60 sec: 41233.0, 300 sec: 41432.1). Total num frames: 2179162112. Throughput: 0: 10325.0. Samples: 294788742. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:53,977][24592] Avg episode reward: [(0, '4.940')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:54,045][626795] Updated weights for policy 0, policy_version 266012 (0.0036)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:56,028][626795] Updated weights for policy 0, policy_version 266022 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:00:58,114][626795] Updated weights for policy 0, policy_version 266032 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:58,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41369.6, 300 sec: 41405.3). Total num frames: 2179366912. Throughput: 0: 10318.7. Samples: 294819204. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:00:58,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:00,001][626795] Updated weights for policy 0, policy_version 266042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:02,135][626795] Updated weights for policy 0, policy_version 266052 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:03,976][24592] Fps is (10 sec: 40960.9, 60 sec: 41233.0, 300 sec: 41376.5). Total num frames: 2179571712. Throughput: 0: 10312.5. Samples: 294881052. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:03,977][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:04,002][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000266062_2179579904.pth...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:04,024][626795] Updated weights for policy 0, policy_version 266062 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:04,080][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000264849_2169643008.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:06,087][626795] Updated weights for policy 0, policy_version 266072 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:08,168][626795] Updated weights for policy 0, policy_version 266082 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:08,976][24592] Fps is (10 sec: 40959.5, 60 sec: 41232.9, 300 sec: 41376.5). Total num frames: 2179776512. Throughput: 0: 10279.5. Samples: 294942090. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:08,977][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:10,195][626795] Updated weights for policy 0, policy_version 266092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:12,125][626795] Updated weights for policy 0, policy_version 266102 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:13,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41096.5, 300 sec: 41376.6). Total num frames: 2179981312. Throughput: 0: 10285.3. Samples: 294972678. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:13,976][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:14,122][626795] Updated weights for policy 0, policy_version 266112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:16,041][626795] Updated weights for policy 0, policy_version 266122 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:18,074][626795] Updated weights for policy 0, policy_version 266132 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:18,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41232.7, 300 sec: 41376.5). Total num frames: 2180194304. Throughput: 0: 10278.4. Samples: 295034382. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:18,978][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:20,152][626795] Updated weights for policy 0, policy_version 266142 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:22,069][626795] Updated weights for policy 0, policy_version 266152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:23,953][626795] Updated weights for policy 0, policy_version 266162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:23,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41233.0, 300 sec: 41376.5). Total num frames: 2180399104. Throughput: 0: 10304.0. Samples: 295097370. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:23,977][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:26,012][626795] Updated weights for policy 0, policy_version 266172 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:27,913][626795] Updated weights for policy 0, policy_version 266182 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:28,976][24592] Fps is (10 sec: 40959.2, 60 sec: 41232.8, 300 sec: 41348.7). Total num frames: 2180603904. Throughput: 0: 10290.8. Samples: 295128126. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:28,977][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:30,027][626795] Updated weights for policy 0, policy_version 266192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:31,912][626795] Updated weights for policy 0, policy_version 266202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:33,941][626795] Updated weights for policy 0, policy_version 266212 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:33,976][24592] Fps is (10 sec: 40960.4, 60 sec: 41233.0, 300 sec: 41348.7). Total num frames: 2180808704. Throughput: 0: 10276.2. Samples: 295189494. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:33,976][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:35,881][626795] Updated weights for policy 0, policy_version 266222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:37,969][626795] Updated weights for policy 0, policy_version 266232 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:38,976][24592] Fps is (10 sec: 40961.2, 60 sec: 41233.0, 300 sec: 41348.7). Total num frames: 2181013504. Throughput: 0: 10282.1. Samples: 295251438. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:38,977][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:40,003][626795] Updated weights for policy 0, policy_version 266242 (0.0031)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:41,893][626795] Updated weights for policy 0, policy_version 266252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:43,859][626795] Updated weights for policy 0, policy_version 266262 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:43,975][24592] Fps is (10 sec: 40960.7, 60 sec: 41096.6, 300 sec: 41348.8). Total num frames: 2181218304. Throughput: 0: 10276.0. Samples: 295281624. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:43,977][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:45,995][626795] Updated weights for policy 0, policy_version 266272 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:48,005][626795] Updated weights for policy 0, policy_version 266282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:48,975][24592] Fps is (10 sec: 40961.8, 60 sec: 41096.6, 300 sec: 41348.8). Total num frames: 2181423104. Throughput: 0: 10267.1. Samples: 295343070. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:48,976][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:49,987][626795] Updated weights for policy 0, policy_version 266292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:52,017][626795] Updated weights for policy 0, policy_version 266302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:53,829][626795] Updated weights for policy 0, policy_version 266312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:53,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41096.8, 300 sec: 41321.1). Total num frames: 2181627904. Throughput: 0: 10291.4. Samples: 295405200. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:53,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:55,861][626795] Updated weights for policy 0, policy_version 266322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:57,853][626795] Updated weights for policy 0, policy_version 266332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:58,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41096.6, 300 sec: 41321.0). Total num frames: 2181832704. Throughput: 0: 10292.1. Samples: 295435824. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:01:58,977][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:01:59,860][626795] Updated weights for policy 0, policy_version 266342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:01,793][626795] Updated weights for policy 0, policy_version 266352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:03,781][626795] Updated weights for policy 0, policy_version 266362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:03,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41096.7, 300 sec: 41293.2). Total num frames: 2182037504. Throughput: 0: 10311.6. Samples: 295498398. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:03,976][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:05,754][626795] Updated weights for policy 0, policy_version 266372 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:07,686][626795] Updated weights for policy 0, policy_version 266382 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:08,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41096.6, 300 sec: 41265.5). Total num frames: 2182242304. Throughput: 0: 10277.1. Samples: 295559838. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:08,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:09,759][626795] Updated weights for policy 0, policy_version 266392 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:11,682][626795] Updated weights for policy 0, policy_version 266402 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:13,708][626795] Updated weights for policy 0, policy_version 266412 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.1, 300 sec: 41321.0). Total num frames: 2182455296. Throughput: 0: 10277.8. Samples: 295590618. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:13,976][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:15,772][626795] Updated weights for policy 0, policy_version 266422 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:17,720][626795] Updated weights for policy 0, policy_version 266432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41096.9, 300 sec: 41321.8). Total num frames: 2182660096. Throughput: 0: 10297.1. Samples: 295652862. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:18,977][24592] Avg episode reward: [(0, '5.000')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:19,716][626795] Updated weights for policy 0, policy_version 266442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:21,701][626795] Updated weights for policy 0, policy_version 266452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:23,639][626795] Updated weights for policy 0, policy_version 266462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:23,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41096.7, 300 sec: 41293.4). Total num frames: 2182864896. Throughput: 0: 10306.8. Samples: 295715238. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:23,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:25,717][626795] Updated weights for policy 0, policy_version 266472 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:27,475][626795] Updated weights for policy 0, policy_version 266482 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:28,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41233.6, 300 sec: 41348.8). Total num frames: 2183077888. Throughput: 0: 10321.5. Samples: 295746090. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:28,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:29,570][626795] Updated weights for policy 0, policy_version 266492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:31,502][626795] Updated weights for policy 0, policy_version 266502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:33,480][626795] Updated weights for policy 0, policy_version 266512 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:33,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41233.2, 300 sec: 41321.0). Total num frames: 2183282688. Throughput: 0: 10343.2. Samples: 295808514. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:33,977][24592] Avg episode reward: [(0, '4.840')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:35,443][626795] Updated weights for policy 0, policy_version 266522 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:37,345][626795] Updated weights for policy 0, policy_version 266532 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:38,975][24592] Fps is (10 sec: 40959.5, 60 sec: 41233.3, 300 sec: 41293.2). Total num frames: 2183487488. Throughput: 0: 10354.8. Samples: 295871166. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:38,976][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:39,433][626795] Updated weights for policy 0, policy_version 266542 (0.0035)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:41,466][626795] Updated weights for policy 0, policy_version 266552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:43,418][626795] Updated weights for policy 0, policy_version 266562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:43,976][24592] Fps is (10 sec: 40959.4, 60 sec: 41233.0, 300 sec: 41293.2). Total num frames: 2183692288. Throughput: 0: 10352.0. Samples: 295901664. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:43,977][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:45,570][626795] Updated weights for policy 0, policy_version 266572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:47,459][626795] Updated weights for policy 0, policy_version 266582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:48,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41233.1, 300 sec: 41293.3). Total num frames: 2183897088. Throughput: 0: 10313.5. Samples: 295962504. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:48,976][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:49,537][626795] Updated weights for policy 0, policy_version 266592 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:51,494][626795] Updated weights for policy 0, policy_version 266602 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:53,370][626795] Updated weights for policy 0, policy_version 266612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:53,976][24592] Fps is (10 sec: 41777.3, 60 sec: 41369.2, 300 sec: 41320.9). Total num frames: 2184110080. Throughput: 0: 10332.8. Samples: 296024820. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:53,977][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:55,490][626795] Updated weights for policy 0, policy_version 266622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:57,406][626795] Updated weights for policy 0, policy_version 266632 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:58,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41233.1, 300 sec: 41293.2). Total num frames: 2184306688. Throughput: 0: 10314.0. Samples: 296054748. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:02:58,976][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:02:59,428][626795] Updated weights for policy 0, policy_version 266642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:01,380][626795] Updated weights for policy 0, policy_version 266652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:03,351][626795] Updated weights for policy 0, policy_version 266662 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:03,975][24592] Fps is (10 sec: 40143.2, 60 sec: 41233.0, 300 sec: 41265.5). Total num frames: 2184511488. Throughput: 0: 10315.2. Samples: 296117046. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:03,980][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000266664_2184511488.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:04,065][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000265456_2174615552.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:05,407][626795] Updated weights for policy 0, policy_version 266672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:07,370][626795] Updated weights for policy 0, policy_version 266682 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:08,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.6, 300 sec: 41293.2). Total num frames: 2184724480. Throughput: 0: 10294.5. Samples: 296178492. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:08,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:09,370][626795] Updated weights for policy 0, policy_version 266692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:11,397][626795] Updated weights for policy 0, policy_version 266702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:13,451][626795] Updated weights for policy 0, policy_version 266712 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:13,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41096.4, 300 sec: 41237.7). Total num frames: 2184921088. Throughput: 0: 10285.4. Samples: 296208936. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:13,977][24592] Avg episode reward: [(0, '4.801')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:15,561][626795] Updated weights for policy 0, policy_version 266722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:17,483][626795] Updated weights for policy 0, policy_version 266732 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:18,976][24592] Fps is (10 sec: 40140.2, 60 sec: 41096.4, 300 sec: 41237.7). Total num frames: 2185125888. Throughput: 0: 10245.3. Samples: 296269554. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:18,976][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:19,550][626795] Updated weights for policy 0, policy_version 266742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:21,524][626795] Updated weights for policy 0, policy_version 266752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:23,422][626795] Updated weights for policy 0, policy_version 266762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:23,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41233.1, 300 sec: 41265.6). Total num frames: 2185338880. Throughput: 0: 10228.7. Samples: 296331456. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:23,976][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:25,421][626795] Updated weights for policy 0, policy_version 266772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:27,486][626795] Updated weights for policy 0, policy_version 266782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:28,976][24592] Fps is (10 sec: 41778.7, 60 sec: 41096.3, 300 sec: 41237.7). Total num frames: 2185543680. Throughput: 0: 10238.4. Samples: 296362392. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:28,978][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:29,383][626795] Updated weights for policy 0, policy_version 266792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:31,348][626795] Updated weights for policy 0, policy_version 266802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:33,289][626795] Updated weights for policy 0, policy_version 266812 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:33,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41096.5, 300 sec: 41237.7). Total num frames: 2185748480. Throughput: 0: 10288.5. Samples: 296425488. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:33,976][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:35,361][626795] Updated weights for policy 0, policy_version 266822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:37,135][626795] Updated weights for policy 0, policy_version 266832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:38,975][24592] Fps is (10 sec: 40961.3, 60 sec: 41096.6, 300 sec: 41237.8). Total num frames: 2185953280. Throughput: 0: 10286.4. Samples: 296487702. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:38,977][24592] Avg episode reward: [(0, '4.324')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:39,245][626795] Updated weights for policy 0, policy_version 266842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:41,179][626795] Updated weights for policy 0, policy_version 266852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:43,165][626795] Updated weights for policy 0, policy_version 266862 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:43,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.1, 300 sec: 41265.5). Total num frames: 2186166272. Throughput: 0: 10308.7. Samples: 296518638. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:43,976][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:45,371][626795] Updated weights for policy 0, policy_version 266872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:47,155][626795] Updated weights for policy 0, policy_version 266882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:48,976][24592] Fps is (10 sec: 40958.7, 60 sec: 41096.3, 300 sec: 41237.7). Total num frames: 2186362880. Throughput: 0: 10268.7. Samples: 296579142. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:48,977][24592] Avg episode reward: [(0, '4.838')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:49,267][626795] Updated weights for policy 0, policy_version 266892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:51,239][626795] Updated weights for policy 0, policy_version 266902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:53,238][626795] Updated weights for policy 0, policy_version 266912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:53,976][24592] Fps is (10 sec: 40139.6, 60 sec: 40960.2, 300 sec: 41209.9). Total num frames: 2186567680. Throughput: 0: 10279.5. Samples: 296641074. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:53,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:55,269][626795] Updated weights for policy 0, policy_version 266922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:57,288][626795] Updated weights for policy 0, policy_version 266932 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:58,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41233.0, 300 sec: 41237.7). Total num frames: 2186780672. Throughput: 0: 10284.7. Samples: 296671746. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:03:58,976][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:03:59,232][626795] Updated weights for policy 0, policy_version 266942 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:01,249][626795] Updated weights for policy 0, policy_version 266952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:03,094][626795] Updated weights for policy 0, policy_version 266962 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:03,976][24592] Fps is (10 sec: 41776.9, 60 sec: 41232.5, 300 sec: 41237.6). Total num frames: 2186985472. Throughput: 0: 10315.4. Samples: 296733756. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:03,977][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:05,117][626795] Updated weights for policy 0, policy_version 266972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:07,095][626795] Updated weights for policy 0, policy_version 266982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:08,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41096.5, 300 sec: 41237.7). Total num frames: 2187190272. Throughput: 0: 10329.9. Samples: 296796300. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:08,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:09,121][626795] Updated weights for policy 0, policy_version 266992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:10,960][626795] Updated weights for policy 0, policy_version 267002 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:13,122][626795] Updated weights for policy 0, policy_version 267012 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:13,976][24592] Fps is (10 sec: 41779.8, 60 sec: 41369.2, 300 sec: 41237.6). Total num frames: 2187403264. Throughput: 0: 10316.4. Samples: 296826636. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:13,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:15,042][626795] Updated weights for policy 0, policy_version 267022 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:17,149][626795] Updated weights for policy 0, policy_version 267032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:18,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41233.1, 300 sec: 41209.9). Total num frames: 2187599872. Throughput: 0: 10285.3. Samples: 296888328. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:18,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:19,042][626795] Updated weights for policy 0, policy_version 267042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:21,047][626795] Updated weights for policy 0, policy_version 267052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:22,970][626795] Updated weights for policy 0, policy_version 267062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:23,975][24592] Fps is (10 sec: 40963.0, 60 sec: 41233.0, 300 sec: 41237.8). Total num frames: 2187812864. Throughput: 0: 10277.6. Samples: 296950194. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:23,977][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:25,155][626795] Updated weights for policy 0, policy_version 267072 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:27,037][626795] Updated weights for policy 0, policy_version 267082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:28,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41096.7, 300 sec: 41210.0). Total num frames: 2188009472. Throughput: 0: 10267.1. Samples: 296980656. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:28,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:29,051][626795] Updated weights for policy 0, policy_version 267092 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:30,901][626795] Updated weights for policy 0, policy_version 267102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:32,971][626795] Updated weights for policy 0, policy_version 267112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:33,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41233.1, 300 sec: 41210.8). Total num frames: 2188222464. Throughput: 0: 10307.7. Samples: 297042984. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:33,977][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:35,005][626795] Updated weights for policy 0, policy_version 267122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:36,911][626795] Updated weights for policy 0, policy_version 267132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:38,839][626795] Updated weights for policy 0, policy_version 267142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:38,976][24592] Fps is (10 sec: 41779.2, 60 sec: 41233.0, 300 sec: 41209.9). Total num frames: 2188427264. Throughput: 0: 10320.3. Samples: 297105486. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:38,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:40,791][626795] Updated weights for policy 0, policy_version 267152 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:42,733][626795] Updated weights for policy 0, policy_version 267162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:43,976][24592] Fps is (10 sec: 40957.6, 60 sec: 41096.2, 300 sec: 41209.9). Total num frames: 2188632064. Throughput: 0: 10335.6. Samples: 297136854. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:43,977][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:44,721][626795] Updated weights for policy 0, policy_version 267172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:46,695][626795] Updated weights for policy 0, policy_version 267182 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:48,689][626795] Updated weights for policy 0, policy_version 267192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:48,976][24592] Fps is (10 sec: 41776.4, 60 sec: 41369.3, 300 sec: 41209.9). Total num frames: 2188845056. Throughput: 0: 10337.9. Samples: 297198960. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:48,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:50,763][626795] Updated weights for policy 0, policy_version 267202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:52,657][626795] Updated weights for policy 0, policy_version 267212 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:53,976][24592] Fps is (10 sec: 41780.8, 60 sec: 41369.7, 300 sec: 41237.7). Total num frames: 2189049856. Throughput: 0: 10329.7. Samples: 297261138. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:53,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:54,652][626795] Updated weights for policy 0, policy_version 267222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:56,763][626795] Updated weights for policy 0, policy_version 267232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:04:58,596][626795] Updated weights for policy 0, policy_version 267242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:58,975][24592] Fps is (10 sec: 40962.7, 60 sec: 41233.1, 300 sec: 41209.9). Total num frames: 2189254656. Throughput: 0: 10330.4. Samples: 297291498. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:04:58,976][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:00,725][626795] Updated weights for policy 0, policy_version 267252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:02,656][626795] Updated weights for policy 0, policy_version 267262 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:03,975][24592] Fps is (10 sec: 40960.7, 60 sec: 41233.7, 300 sec: 41209.9). Total num frames: 2189459456. Throughput: 0: 10328.6. Samples: 297353112. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:03,977][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000267269_2189467648.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:04,047][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000266062_2179579904.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:04,624][626795] Updated weights for policy 0, policy_version 267272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:06,665][626795] Updated weights for policy 0, policy_version 267282 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:08,557][626795] Updated weights for policy 0, policy_version 267292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:08,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41233.0, 300 sec: 41182.1). Total num frames: 2189664256. Throughput: 0: 10344.4. Samples: 297415692. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:08,978][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:10,569][626795] Updated weights for policy 0, policy_version 267302 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:12,508][626795] Updated weights for policy 0, policy_version 267312 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.6, 300 sec: 41209.9). Total num frames: 2189877248. Throughput: 0: 10353.6. Samples: 297446568. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:13,976][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:14,504][626795] Updated weights for policy 0, policy_version 267322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:16,196][626772] Signal inference workers to stop experience collection... (3950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:16,200][626772] Signal inference workers to resume experience collection... (3950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:16,211][626795] InferenceWorker_p0-w0: stopping experience collection (3950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:16,217][626795] InferenceWorker_p0-w0: resuming experience collection (3950 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:16,477][626795] Updated weights for policy 0, policy_version 267332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:18,576][626795] Updated weights for policy 0, policy_version 267342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:18,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41369.5, 300 sec: 41209.9). Total num frames: 2190082048. Throughput: 0: 10346.9. Samples: 297508596. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:18,976][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:20,425][626795] Updated weights for policy 0, policy_version 267352 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:22,519][626795] Updated weights for policy 0, policy_version 267362 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:23,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41233.0, 300 sec: 41210.0). Total num frames: 2190286848. Throughput: 0: 10331.7. Samples: 297570414. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:23,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:24,413][626795] Updated weights for policy 0, policy_version 267372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:26,399][626795] Updated weights for policy 0, policy_version 267382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:28,360][626795] Updated weights for policy 0, policy_version 267392 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:28,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41505.9, 300 sec: 41237.7). Total num frames: 2190499840. Throughput: 0: 10320.6. Samples: 297601278. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:28,977][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:30,499][626795] Updated weights for policy 0, policy_version 267402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:32,358][626795] Updated weights for policy 0, policy_version 267412 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:33,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41369.6, 300 sec: 41237.7). Total num frames: 2190704640. Throughput: 0: 10330.0. Samples: 297663804. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:33,978][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:34,287][626795] Updated weights for policy 0, policy_version 267422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:36,373][626795] Updated weights for policy 0, policy_version 267432 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:38,270][626795] Updated weights for policy 0, policy_version 267442 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:38,975][24592] Fps is (10 sec: 40961.8, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2190909440. Throughput: 0: 10333.5. Samples: 297726144. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:38,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:40,201][626795] Updated weights for policy 0, policy_version 267452 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:42,272][626795] Updated weights for policy 0, policy_version 267462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:43,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41506.4, 300 sec: 41237.7). Total num frames: 2191122432. Throughput: 0: 10346.0. Samples: 297757068. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:43,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:44,124][626795] Updated weights for policy 0, policy_version 267472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:46,220][626795] Updated weights for policy 0, policy_version 267482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:48,140][626795] Updated weights for policy 0, policy_version 267492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:48,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41370.0, 300 sec: 41237.7). Total num frames: 2191327232. Throughput: 0: 10360.5. Samples: 297819336. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:48,978][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:50,221][626795] Updated weights for policy 0, policy_version 267502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:52,236][626795] Updated weights for policy 0, policy_version 267512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:53,976][24592] Fps is (10 sec: 40960.2, 60 sec: 41369.7, 300 sec: 41237.7). Total num frames: 2191532032. Throughput: 0: 10313.7. Samples: 297879810. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:53,978][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:54,300][626795] Updated weights for policy 0, policy_version 267522 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:56,214][626795] Updated weights for policy 0, policy_version 267532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:05:58,388][626795] Updated weights for policy 0, policy_version 267542 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:58,975][24592] Fps is (10 sec: 40140.9, 60 sec: 41233.1, 300 sec: 41209.9). Total num frames: 2191728640. Throughput: 0: 10306.5. Samples: 297910362. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:05:58,977][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:00,146][626795] Updated weights for policy 0, policy_version 267552 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:02,237][626795] Updated weights for policy 0, policy_version 267562 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:03,975][24592] Fps is (10 sec: 40141.2, 60 sec: 41233.1, 300 sec: 41210.0). Total num frames: 2191933440. Throughput: 0: 10299.9. Samples: 297972090. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:03,977][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:04,199][626795] Updated weights for policy 0, policy_version 267572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:06,298][626795] Updated weights for policy 0, policy_version 267582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:08,178][626795] Updated weights for policy 0, policy_version 267592 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:08,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.6, 300 sec: 41237.7). Total num frames: 2192146432. Throughput: 0: 10304.7. Samples: 298034124. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:08,976][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:10,135][626795] Updated weights for policy 0, policy_version 267602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:12,041][626795] Updated weights for policy 0, policy_version 267612 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:13,976][24592] Fps is (10 sec: 41777.5, 60 sec: 41232.8, 300 sec: 41209.9). Total num frames: 2192351232. Throughput: 0: 10310.1. Samples: 298065234. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:13,977][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:14,115][626795] Updated weights for policy 0, policy_version 267622 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:16,101][626795] Updated weights for policy 0, policy_version 267632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:17,963][626795] Updated weights for policy 0, policy_version 267642 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:18,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41233.2, 300 sec: 41210.0). Total num frames: 2192556032. Throughput: 0: 10304.9. Samples: 298127526. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:18,976][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:19,984][626795] Updated weights for policy 0, policy_version 267652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:22,019][626795] Updated weights for policy 0, policy_version 267662 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:23,966][626795] Updated weights for policy 0, policy_version 267672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:23,977][24592] Fps is (10 sec: 41775.2, 60 sec: 41368.7, 300 sec: 41237.6). Total num frames: 2192769024. Throughput: 0: 10303.8. Samples: 298189830. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:23,979][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:25,922][626795] Updated weights for policy 0, policy_version 267682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:27,880][626795] Updated weights for policy 0, policy_version 267692 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:28,978][24592] Fps is (10 sec: 41769.2, 60 sec: 41231.7, 300 sec: 41237.4). Total num frames: 2192973824. Throughput: 0: 10300.8. Samples: 298220628. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:28,980][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:29,919][626795] Updated weights for policy 0, policy_version 267702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:31,891][626795] Updated weights for policy 0, policy_version 267712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:33,819][626795] Updated weights for policy 0, policy_version 267722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:33,976][24592] Fps is (10 sec: 40962.9, 60 sec: 41232.6, 300 sec: 41237.7). Total num frames: 2193178624. Throughput: 0: 10293.7. Samples: 298282560. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:33,977][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:35,859][626795] Updated weights for policy 0, policy_version 267732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:37,792][626795] Updated weights for policy 0, policy_version 267742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:38,975][24592] Fps is (10 sec: 41789.2, 60 sec: 41369.6, 300 sec: 41265.5). Total num frames: 2193391616. Throughput: 0: 10347.9. Samples: 298345464. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:38,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:39,731][626795] Updated weights for policy 0, policy_version 267752 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:41,689][626795] Updated weights for policy 0, policy_version 267762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:43,667][626795] Updated weights for policy 0, policy_version 267772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:43,975][24592] Fps is (10 sec: 41781.5, 60 sec: 41233.1, 300 sec: 41265.5). Total num frames: 2193596416. Throughput: 0: 10356.0. Samples: 298376382. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:43,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:45,805][626795] Updated weights for policy 0, policy_version 267782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:47,702][626795] Updated weights for policy 0, policy_version 267792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:48,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41233.1, 300 sec: 41265.5). Total num frames: 2193801216. Throughput: 0: 10357.9. Samples: 298438194. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:48,976][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:49,701][626795] Updated weights for policy 0, policy_version 267802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:51,614][626795] Updated weights for policy 0, policy_version 267812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:53,506][626795] Updated weights for policy 0, policy_version 267822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:53,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41233.1, 300 sec: 41265.5). Total num frames: 2194006016. Throughput: 0: 10384.3. Samples: 298501416. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:53,976][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:55,587][626795] Updated weights for policy 0, policy_version 267832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:57,684][626795] Updated weights for policy 0, policy_version 267842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:58,978][24592] Fps is (10 sec: 40951.0, 60 sec: 41368.1, 300 sec: 41265.2). Total num frames: 2194210816. Throughput: 0: 10345.0. Samples: 298530780. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:06:58,979][24592] Avg episode reward: [(0, '4.903')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:06:59,738][626795] Updated weights for policy 0, policy_version 267852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:01,698][626795] Updated weights for policy 0, policy_version 267862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:03,701][626795] Updated weights for policy 0, policy_version 267872 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:03,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41369.6, 300 sec: 41265.5). Total num frames: 2194415616. Throughput: 0: 10338.9. Samples: 298592778. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:03,977][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:04,001][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000267874_2194423808.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:04,116][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000266664_2184511488.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:05,687][626795] Updated weights for policy 0, policy_version 267882 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:07,578][626795] Updated weights for policy 0, policy_version 267892 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:08,975][24592] Fps is (10 sec: 40969.0, 60 sec: 41233.1, 300 sec: 41237.7). Total num frames: 2194620416. Throughput: 0: 10315.8. Samples: 298654026. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:08,977][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:09,616][626795] Updated weights for policy 0, policy_version 267902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:11,677][626795] Updated weights for policy 0, policy_version 267912 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:13,566][626795] Updated weights for policy 0, policy_version 267922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:13,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41233.3, 300 sec: 41237.7). Total num frames: 2194825216. Throughput: 0: 10313.3. Samples: 298684704. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:13,976][24592] Avg episode reward: [(0, '4.867')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:15,644][626795] Updated weights for policy 0, policy_version 267932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:17,612][626795] Updated weights for policy 0, policy_version 267942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:18,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.6, 300 sec: 41265.5). Total num frames: 2195038208. Throughput: 0: 10328.7. Samples: 298747344. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:18,977][24592] Avg episode reward: [(0, '5.177')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:19,553][626795] Updated weights for policy 0, policy_version 267952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:21,538][626795] Updated weights for policy 0, policy_version 267962 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:23,553][626795] Updated weights for policy 0, policy_version 267972 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:23,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41234.0, 300 sec: 41237.7). Total num frames: 2195243008. Throughput: 0: 10309.1. Samples: 298809372. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:23,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:25,456][626795] Updated weights for policy 0, policy_version 267982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:27,399][626795] Updated weights for policy 0, policy_version 267992 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:28,976][24592] Fps is (10 sec: 40958.0, 60 sec: 41234.4, 300 sec: 41237.6). Total num frames: 2195447808. Throughput: 0: 10302.3. Samples: 298839990. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:28,978][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:29,468][626795] Updated weights for policy 0, policy_version 268002 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:31,530][626795] Updated weights for policy 0, policy_version 268012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:33,439][626795] Updated weights for policy 0, policy_version 268022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:33,975][24592] Fps is (10 sec: 40140.7, 60 sec: 41096.9, 300 sec: 41209.9). Total num frames: 2195644416. Throughput: 0: 10290.4. Samples: 298901262. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:33,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:35,593][626795] Updated weights for policy 0, policy_version 268032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:37,429][626795] Updated weights for policy 0, policy_version 268042 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:38,975][24592] Fps is (10 sec: 40961.6, 60 sec: 41096.5, 300 sec: 41237.7). Total num frames: 2195857408. Throughput: 0: 10266.9. Samples: 298963428. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:38,976][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:39,455][626795] Updated weights for policy 0, policy_version 268052 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:41,472][626795] Updated weights for policy 0, policy_version 268062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:43,317][626795] Updated weights for policy 0, policy_version 268072 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:43,976][24592] Fps is (10 sec: 42597.9, 60 sec: 41233.0, 300 sec: 41265.4). Total num frames: 2196070400. Throughput: 0: 10294.1. Samples: 298993992. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:43,978][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:45,359][626795] Updated weights for policy 0, policy_version 268082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:47,351][626795] Updated weights for policy 0, policy_version 268092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:48,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41232.8, 300 sec: 41237.7). Total num frames: 2196275200. Throughput: 0: 10294.7. Samples: 299056044. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:48,978][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:49,360][626795] Updated weights for policy 0, policy_version 268102 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:51,413][626795] Updated weights for policy 0, policy_version 268112 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:53,210][626795] Updated weights for policy 0, policy_version 268122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:53,977][24592] Fps is (10 sec: 40955.0, 60 sec: 41232.1, 300 sec: 41265.3). Total num frames: 2196480000. Throughput: 0: 10327.3. Samples: 299118768. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:53,978][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:55,260][626795] Updated weights for policy 0, policy_version 268132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:57,246][626795] Updated weights for policy 0, policy_version 268142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:58,976][24592] Fps is (10 sec: 41780.2, 60 sec: 41371.0, 300 sec: 41293.2). Total num frames: 2196692992. Throughput: 0: 10330.1. Samples: 299149560. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:07:58,977][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:07:59,228][626795] Updated weights for policy 0, policy_version 268152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:01,184][626795] Updated weights for policy 0, policy_version 268162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:03,250][626795] Updated weights for policy 0, policy_version 268172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:03,975][24592] Fps is (10 sec: 41785.2, 60 sec: 41369.7, 300 sec: 41265.5). Total num frames: 2196897792. Throughput: 0: 10309.9. Samples: 299211288. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:03,976][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:05,262][626795] Updated weights for policy 0, policy_version 268182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:07,273][626795] Updated weights for policy 0, policy_version 268192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:08,976][24592] Fps is (10 sec: 40957.9, 60 sec: 41369.2, 300 sec: 41293.2). Total num frames: 2197102592. Throughput: 0: 10302.9. Samples: 299273010. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:08,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:09,206][626795] Updated weights for policy 0, policy_version 268202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:11,242][626795] Updated weights for policy 0, policy_version 268212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:13,086][626795] Updated weights for policy 0, policy_version 268222 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:13,976][24592] Fps is (10 sec: 40955.6, 60 sec: 41368.9, 300 sec: 41293.1). Total num frames: 2197307392. Throughput: 0: 10304.3. Samples: 299303688. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:13,979][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:15,230][626795] Updated weights for policy 0, policy_version 268232 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:17,029][626795] Updated weights for policy 0, policy_version 268242 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:18,975][24592] Fps is (10 sec: 40962.6, 60 sec: 41233.1, 300 sec: 41265.5). Total num frames: 2197512192. Throughput: 0: 10327.1. Samples: 299365980. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:18,976][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:19,137][626795] Updated weights for policy 0, policy_version 268252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:21,033][626795] Updated weights for policy 0, policy_version 268262 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:23,068][626795] Updated weights for policy 0, policy_version 268272 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:23,976][24592] Fps is (10 sec: 40963.2, 60 sec: 41232.9, 300 sec: 41265.5). Total num frames: 2197716992. Throughput: 0: 10339.0. Samples: 299428686. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:23,977][24592] Avg episode reward: [(0, '4.971')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:24,970][626795] Updated weights for policy 0, policy_version 268282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:26,976][626795] Updated weights for policy 0, policy_version 268292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:28,896][626795] Updated weights for policy 0, policy_version 268302 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:28,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.9, 300 sec: 41293.2). Total num frames: 2197929984. Throughput: 0: 10346.7. Samples: 299459592. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:28,976][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:30,868][626795] Updated weights for policy 0, policy_version 268312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:32,780][626795] Updated weights for policy 0, policy_version 268322 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:33,976][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.0, 300 sec: 41293.2). Total num frames: 2198134784. Throughput: 0: 10377.5. Samples: 299523030. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:33,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:34,910][626795] Updated weights for policy 0, policy_version 268332 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:36,770][626795] Updated weights for policy 0, policy_version 268342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:38,841][626795] Updated weights for policy 0, policy_version 268352 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:38,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41369.6, 300 sec: 41265.5). Total num frames: 2198339584. Throughput: 0: 10337.4. Samples: 299583936. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:38,978][24592] Avg episode reward: [(0, '4.877')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:40,816][626795] Updated weights for policy 0, policy_version 268362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:42,712][626795] Updated weights for policy 0, policy_version 268372 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:43,975][24592] Fps is (10 sec: 40960.7, 60 sec: 41233.2, 300 sec: 41293.3). Total num frames: 2198544384. Throughput: 0: 10347.9. Samples: 299615214. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:43,976][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:44,764][626795] Updated weights for policy 0, policy_version 268382 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:46,658][626795] Updated weights for policy 0, policy_version 268392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:48,709][626795] Updated weights for policy 0, policy_version 268402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.8, 300 sec: 41321.0). Total num frames: 2198757376. Throughput: 0: 10353.0. Samples: 299677176. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:48,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:50,550][626795] Updated weights for policy 0, policy_version 268412 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:52,550][626795] Updated weights for policy 0, policy_version 268422 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:53,976][24592] Fps is (10 sec: 42595.5, 60 sec: 41506.6, 300 sec: 41320.9). Total num frames: 2198970368. Throughput: 0: 10386.7. Samples: 299740410. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:53,978][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:54,590][626795] Updated weights for policy 0, policy_version 268432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:56,531][626795] Updated weights for policy 0, policy_version 268442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:08:58,558][626795] Updated weights for policy 0, policy_version 268452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:58,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41369.6, 300 sec: 41321.1). Total num frames: 2199175168. Throughput: 0: 10374.2. Samples: 299770518. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:08:58,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:00,645][626795] Updated weights for policy 0, policy_version 268462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:02,455][626795] Updated weights for policy 0, policy_version 268472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:03,975][24592] Fps is (10 sec: 40963.0, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2199379968. Throughput: 0: 10394.1. Samples: 299833716. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:03,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000268479_2199379968.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:04,067][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000267269_2189467648.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:04,498][626795] Updated weights for policy 0, policy_version 268482 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:06,626][626795] Updated weights for policy 0, policy_version 268492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:08,507][626795] Updated weights for policy 0, policy_version 268502 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:08,976][24592] Fps is (10 sec: 40958.2, 60 sec: 41369.6, 300 sec: 41293.3). Total num frames: 2199584768. Throughput: 0: 10346.7. Samples: 299894292. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:08,977][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:10,576][626795] Updated weights for policy 0, policy_version 268512 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:12,476][626795] Updated weights for policy 0, policy_version 268522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:13,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41370.2, 300 sec: 41321.0). Total num frames: 2199789568. Throughput: 0: 10337.6. Samples: 299924784. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:13,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:14,512][626795] Updated weights for policy 0, policy_version 268532 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:16,446][626795] Updated weights for policy 0, policy_version 268542 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:18,461][626795] Updated weights for policy 0, policy_version 268552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:18,976][24592] Fps is (10 sec: 40961.7, 60 sec: 41369.5, 300 sec: 41293.2). Total num frames: 2199994368. Throughput: 0: 10307.6. Samples: 299986872. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:18,978][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:20,416][626795] Updated weights for policy 0, policy_version 268562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:22,377][626795] Updated weights for policy 0, policy_version 268572 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:23,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41506.1, 300 sec: 41348.7). Total num frames: 2200207360. Throughput: 0: 10357.1. Samples: 300050010. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:23,977][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:24,297][626795] Updated weights for policy 0, policy_version 268582 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:26,291][626795] Updated weights for policy 0, policy_version 268592 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:28,287][626795] Updated weights for policy 0, policy_version 268602 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:28,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41369.3, 300 sec: 41320.9). Total num frames: 2200412160. Throughput: 0: 10345.9. Samples: 300080784. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:28,977][24592] Avg episode reward: [(0, '4.419')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:30,251][626795] Updated weights for policy 0, policy_version 268612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:32,231][626795] Updated weights for policy 0, policy_version 268622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:33,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41506.2, 300 sec: 41348.8). Total num frames: 2200625152. Throughput: 0: 10357.9. Samples: 300143280. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:33,977][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:34,170][626795] Updated weights for policy 0, policy_version 268632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:36,127][626795] Updated weights for policy 0, policy_version 268642 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:38,016][626795] Updated weights for policy 0, policy_version 268652 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:38,976][24592] Fps is (10 sec: 41777.9, 60 sec: 41505.7, 300 sec: 41348.7). Total num frames: 2200829952. Throughput: 0: 10358.8. Samples: 300206556. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:38,977][24592] Avg episode reward: [(0, '4.880')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:40,061][626795] Updated weights for policy 0, policy_version 268662 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:41,870][626795] Updated weights for policy 0, policy_version 268672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:43,857][626795] Updated weights for policy 0, policy_version 268682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:43,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41642.5, 300 sec: 41348.8). Total num frames: 2201042944. Throughput: 0: 10393.1. Samples: 300238206. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:43,976][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:45,754][626795] Updated weights for policy 0, policy_version 268692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:47,800][626795] Updated weights for policy 0, policy_version 268702 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:48,975][24592] Fps is (10 sec: 42601.6, 60 sec: 41642.7, 300 sec: 41376.6). Total num frames: 2201255936. Throughput: 0: 10394.5. Samples: 300301470. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:48,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:49,777][626795] Updated weights for policy 0, policy_version 268712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:51,698][626795] Updated weights for policy 0, policy_version 268722 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:53,671][626795] Updated weights for policy 0, policy_version 268732 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:53,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41506.5, 300 sec: 41376.5). Total num frames: 2201460736. Throughput: 0: 10420.1. Samples: 300363192. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:53,979][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:55,761][626795] Updated weights for policy 0, policy_version 268742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:57,757][626795] Updated weights for policy 0, policy_version 268752 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:58,976][24592] Fps is (10 sec: 40958.6, 60 sec: 41506.0, 300 sec: 41376.5). Total num frames: 2201665536. Throughput: 0: 10411.7. Samples: 300393312. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:09:58,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:09:59,752][626795] Updated weights for policy 0, policy_version 268762 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:01,771][626795] Updated weights for policy 0, policy_version 268772 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:03,696][626795] Updated weights for policy 0, policy_version 268782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:03,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41506.1, 300 sec: 41376.5). Total num frames: 2201870336. Throughput: 0: 10409.1. Samples: 300455280. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:03,977][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:05,729][626795] Updated weights for policy 0, policy_version 268792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:07,748][626795] Updated weights for policy 0, policy_version 268802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:08,976][24592] Fps is (10 sec: 40957.9, 60 sec: 41506.0, 300 sec: 41348.7). Total num frames: 2202075136. Throughput: 0: 10387.9. Samples: 300517470. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:08,977][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:09,644][626795] Updated weights for policy 0, policy_version 268812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:11,631][626795] Updated weights for policy 0, policy_version 268822 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:13,550][626795] Updated weights for policy 0, policy_version 268832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:13,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41506.2, 300 sec: 41348.8). Total num frames: 2202279936. Throughput: 0: 10396.1. Samples: 300548604. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:13,978][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:15,682][626795] Updated weights for policy 0, policy_version 268842 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:17,640][626795] Updated weights for policy 0, policy_version 268852 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:18,976][24592] Fps is (10 sec: 40961.3, 60 sec: 41505.9, 300 sec: 41348.7). Total num frames: 2202484736. Throughput: 0: 10367.9. Samples: 300609840. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:18,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:19,610][626795] Updated weights for policy 0, policy_version 268862 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:21,608][626795] Updated weights for policy 0, policy_version 268872 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:23,586][626795] Updated weights for policy 0, policy_version 268882 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:23,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41506.3, 300 sec: 41348.8). Total num frames: 2202697728. Throughput: 0: 10346.3. Samples: 300672132. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:23,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:25,560][626795] Updated weights for policy 0, policy_version 268892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:27,603][626795] Updated weights for policy 0, policy_version 268902 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:28,976][24592] Fps is (10 sec: 40960.4, 60 sec: 41369.6, 300 sec: 41320.9). Total num frames: 2202894336. Throughput: 0: 10312.9. Samples: 300702288. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:28,977][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:29,654][626795] Updated weights for policy 0, policy_version 268912 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:31,568][626795] Updated weights for policy 0, policy_version 268922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:33,453][626795] Updated weights for policy 0, policy_version 268932 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:33,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41369.5, 300 sec: 41348.7). Total num frames: 2203107328. Throughput: 0: 10286.6. Samples: 300764370. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:33,978][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:35,540][626795] Updated weights for policy 0, policy_version 268942 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:37,615][626795] Updated weights for policy 0, policy_version 268952 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:38,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41370.1, 300 sec: 41321.0). Total num frames: 2203312128. Throughput: 0: 10298.1. Samples: 300826608. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:38,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:39,483][626795] Updated weights for policy 0, policy_version 268962 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:41,505][626795] Updated weights for policy 0, policy_version 268972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:43,279][626795] Updated weights for policy 0, policy_version 268982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:43,975][24592] Fps is (10 sec: 40960.8, 60 sec: 41233.2, 300 sec: 41321.0). Total num frames: 2203516928. Throughput: 0: 10314.5. Samples: 300857460. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:43,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:45,439][626795] Updated weights for policy 0, policy_version 268992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:47,332][626795] Updated weights for policy 0, policy_version 269002 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:48,977][24592] Fps is (10 sec: 40954.3, 60 sec: 41095.5, 300 sec: 41320.8). Total num frames: 2203721728. Throughput: 0: 10331.9. Samples: 300920232. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:48,978][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:49,347][626795] Updated weights for policy 0, policy_version 269012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:51,359][626795] Updated weights for policy 0, policy_version 269022 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:53,377][626795] Updated weights for policy 0, policy_version 269032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:53,976][24592] Fps is (10 sec: 41776.7, 60 sec: 41232.7, 300 sec: 41376.5). Total num frames: 2203934720. Throughput: 0: 10321.8. Samples: 300981948. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:53,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:55,390][626795] Updated weights for policy 0, policy_version 269042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:57,463][626795] Updated weights for policy 0, policy_version 269052 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:58,975][24592] Fps is (10 sec: 41784.9, 60 sec: 41233.2, 300 sec: 41376.5). Total num frames: 2204139520. Throughput: 0: 10293.0. Samples: 301011792. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:10:58,976][24592] Avg episode reward: [(0, '5.090')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:10:59,315][626795] Updated weights for policy 0, policy_version 269062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:01,370][626795] Updated weights for policy 0, policy_version 269072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:03,340][626795] Updated weights for policy 0, policy_version 269082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:03,975][24592] Fps is (10 sec: 40142.9, 60 sec: 41096.5, 300 sec: 41321.0). Total num frames: 2204336128. Throughput: 0: 10303.4. Samples: 301073490. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:03,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:04,026][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000269085_2204344320.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:04,142][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000267874_2194423808.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:05,464][626795] Updated weights for policy 0, policy_version 269092 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:07,406][626795] Updated weights for policy 0, policy_version 269102 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:08,975][24592] Fps is (10 sec: 40141.3, 60 sec: 41097.1, 300 sec: 41321.1). Total num frames: 2204540928. Throughput: 0: 10247.3. Samples: 301133262. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:08,976][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:09,648][626795] Updated weights for policy 0, policy_version 269112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:11,502][626795] Updated weights for policy 0, policy_version 269122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:13,490][626795] Updated weights for policy 0, policy_version 269132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:13,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41096.5, 300 sec: 41321.0). Total num frames: 2204745728. Throughput: 0: 10261.2. Samples: 301164036. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:13,978][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:15,525][626795] Updated weights for policy 0, policy_version 269142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:17,305][626795] Updated weights for policy 0, policy_version 269152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41233.5, 300 sec: 41321.2). Total num frames: 2204958720. Throughput: 0: 10255.4. Samples: 301225860. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:18,976][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:19,501][626795] Updated weights for policy 0, policy_version 269162 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:21,397][626795] Updated weights for policy 0, policy_version 269172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:23,374][626795] Updated weights for policy 0, policy_version 269182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:23,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41096.5, 300 sec: 41321.3). Total num frames: 2205163520. Throughput: 0: 10268.8. Samples: 301288704. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:23,976][24592] Avg episode reward: [(0, '4.838')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:25,266][626795] Updated weights for policy 0, policy_version 269192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:27,247][626795] Updated weights for policy 0, policy_version 269202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:28,976][24592] Fps is (10 sec: 40959.0, 60 sec: 41233.2, 300 sec: 41321.1). Total num frames: 2205368320. Throughput: 0: 10266.3. Samples: 301319448. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:28,976][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:29,350][626795] Updated weights for policy 0, policy_version 269212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:31,265][626795] Updated weights for policy 0, policy_version 269222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:33,289][626795] Updated weights for policy 0, policy_version 269232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:33,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41096.7, 300 sec: 41293.2). Total num frames: 2205573120. Throughput: 0: 10246.5. Samples: 301381308. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:33,977][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:35,309][626795] Updated weights for policy 0, policy_version 269242 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:37,251][626795] Updated weights for policy 0, policy_version 269252 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:38,975][24592] Fps is (10 sec: 40961.0, 60 sec: 41096.6, 300 sec: 41293.2). Total num frames: 2205777920. Throughput: 0: 10247.3. Samples: 301443072. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:38,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:39,314][626795] Updated weights for policy 0, policy_version 269262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:41,215][626795] Updated weights for policy 0, policy_version 269272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:43,249][626795] Updated weights for policy 0, policy_version 269282 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:43,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.1, 300 sec: 41321.0). Total num frames: 2205990912. Throughput: 0: 10275.9. Samples: 301474206. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:43,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:45,157][626795] Updated weights for policy 0, policy_version 269292 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:47,168][626795] Updated weights for policy 0, policy_version 269302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:48,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41234.1, 300 sec: 41321.0). Total num frames: 2206195712. Throughput: 0: 10295.8. Samples: 301536798. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:48,976][24592] Avg episode reward: [(0, '4.474')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:48,989][626795] Updated weights for policy 0, policy_version 269312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:51,131][626795] Updated weights for policy 0, policy_version 269322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:53,073][626795] Updated weights for policy 0, policy_version 269332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:53,976][24592] Fps is (10 sec: 40959.4, 60 sec: 41096.8, 300 sec: 41321.3). Total num frames: 2206400512. Throughput: 0: 10353.0. Samples: 301599150. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:53,977][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:54,991][626795] Updated weights for policy 0, policy_version 269342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:57,041][626795] Updated weights for policy 0, policy_version 269352 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:11:58,888][626795] Updated weights for policy 0, policy_version 269362 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41233.2, 300 sec: 41348.8). Total num frames: 2206613504. Throughput: 0: 10354.9. Samples: 301630008. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:11:58,977][24592] Avg episode reward: [(0, '4.870')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:00,861][626795] Updated weights for policy 0, policy_version 269372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:03,069][626795] Updated weights for policy 0, policy_version 269382 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.6, 300 sec: 41348.8). Total num frames: 2206818304. Throughput: 0: 10352.8. Samples: 301691736. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:03,977][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:04,925][626795] Updated weights for policy 0, policy_version 269392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:06,800][626795] Updated weights for policy 0, policy_version 269402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:08,836][626795] Updated weights for policy 0, policy_version 269412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:08,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41369.5, 300 sec: 41348.8). Total num frames: 2207023104. Throughput: 0: 10353.3. Samples: 301754604. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:08,977][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:10,826][626795] Updated weights for policy 0, policy_version 269422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:12,836][626795] Updated weights for policy 0, policy_version 269432 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:13,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2207227904. Throughput: 0: 10354.6. Samples: 301785402. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:13,976][24592] Avg episode reward: [(0, '4.779')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:14,848][626795] Updated weights for policy 0, policy_version 269442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:16,809][626795] Updated weights for policy 0, policy_version 269452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:18,730][626795] Updated weights for policy 0, policy_version 269462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:18,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41369.4, 300 sec: 41348.7). Total num frames: 2207440896. Throughput: 0: 10357.7. Samples: 301847406. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:18,977][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:20,757][626795] Updated weights for policy 0, policy_version 269472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:22,717][626795] Updated weights for policy 0, policy_version 269482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:23,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.7, 300 sec: 41348.8). Total num frames: 2207645696. Throughput: 0: 10377.1. Samples: 301910040. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:23,976][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:24,682][626795] Updated weights for policy 0, policy_version 269492 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:26,675][626795] Updated weights for policy 0, policy_version 269502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:28,577][626795] Updated weights for policy 0, policy_version 269512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:28,975][24592] Fps is (10 sec: 40961.4, 60 sec: 41369.8, 300 sec: 41376.6). Total num frames: 2207850496. Throughput: 0: 10369.3. Samples: 301940826. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:28,976][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:30,554][626795] Updated weights for policy 0, policy_version 269522 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:32,547][626795] Updated weights for policy 0, policy_version 269532 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:33,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41506.0, 300 sec: 41376.5). Total num frames: 2208063488. Throughput: 0: 10367.2. Samples: 302003322. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:33,976][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:34,632][626795] Updated weights for policy 0, policy_version 269542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:36,578][626795] Updated weights for policy 0, policy_version 269552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:38,458][626795] Updated weights for policy 0, policy_version 269562 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:38,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41506.1, 300 sec: 41348.8). Total num frames: 2208268288. Throughput: 0: 10364.3. Samples: 302065542. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:38,976][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:40,477][626795] Updated weights for policy 0, policy_version 269572 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:41,602][626772] Signal inference workers to stop experience collection... (4000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:41,602][626772] Signal inference workers to resume experience collection... (4000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:41,618][626795] InferenceWorker_p0-w0: stopping experience collection (4000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:41,620][626795] InferenceWorker_p0-w0: resuming experience collection (4000 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:42,459][626795] Updated weights for policy 0, policy_version 269582 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:43,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41369.5, 300 sec: 41348.8). Total num frames: 2208473088. Throughput: 0: 10356.5. Samples: 302096052. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:43,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:44,492][626795] Updated weights for policy 0, policy_version 269592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:46,428][626795] Updated weights for policy 0, policy_version 269602 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:48,420][626795] Updated weights for policy 0, policy_version 269612 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:48,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41369.6, 300 sec: 41349.0). Total num frames: 2208677888. Throughput: 0: 10354.2. Samples: 302157672. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:48,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:50,506][626795] Updated weights for policy 0, policy_version 269622 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:52,428][626795] Updated weights for policy 0, policy_version 269632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:53,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41506.0, 300 sec: 41348.7). Total num frames: 2208890880. Throughput: 0: 10347.0. Samples: 302220222. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:53,977][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:54,406][626795] Updated weights for policy 0, policy_version 269642 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:56,382][626795] Updated weights for policy 0, policy_version 269652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:12:58,318][626795] Updated weights for policy 0, policy_version 269662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:58,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41369.7, 300 sec: 41348.8). Total num frames: 2209095680. Throughput: 0: 10353.4. Samples: 302251302. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:12:58,976][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:00,230][626795] Updated weights for policy 0, policy_version 269672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:02,323][626795] Updated weights for policy 0, policy_version 269682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:03,975][24592] Fps is (10 sec: 40961.6, 60 sec: 41369.7, 300 sec: 41348.9). Total num frames: 2209300480. Throughput: 0: 10356.7. Samples: 302313456. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:03,978][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:04,035][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000269691_2209308672.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:04,096][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000268479_2199379968.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:04,218][626795] Updated weights for policy 0, policy_version 269692 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:06,322][626795] Updated weights for policy 0, policy_version 269702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:08,339][626795] Updated weights for policy 0, policy_version 269712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:08,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41369.7, 300 sec: 41348.9). Total num frames: 2209505280. Throughput: 0: 10330.5. Samples: 302374914. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:08,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:10,237][626795] Updated weights for policy 0, policy_version 269722 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:12,252][626795] Updated weights for policy 0, policy_version 269732 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:13,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41369.5, 300 sec: 41348.8). Total num frames: 2209710080. Throughput: 0: 10332.9. Samples: 302405808. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:13,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:14,222][626795] Updated weights for policy 0, policy_version 269742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:16,202][626795] Updated weights for policy 0, policy_version 269752 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:18,186][626795] Updated weights for policy 0, policy_version 269762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:18,976][24592] Fps is (10 sec: 40958.0, 60 sec: 41233.0, 300 sec: 41348.7). Total num frames: 2209914880. Throughput: 0: 10306.7. Samples: 302467128. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:18,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:20,239][626795] Updated weights for policy 0, policy_version 269772 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:22,128][626795] Updated weights for policy 0, policy_version 269782 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:23,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41369.6, 300 sec: 41348.8). Total num frames: 2210127872. Throughput: 0: 10317.4. Samples: 302529822. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:23,976][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:24,116][626795] Updated weights for policy 0, policy_version 269792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:26,164][626795] Updated weights for policy 0, policy_version 269802 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:28,103][626795] Updated weights for policy 0, policy_version 269812 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:28,976][24592] Fps is (10 sec: 41777.8, 60 sec: 41369.0, 300 sec: 41348.7). Total num frames: 2210332672. Throughput: 0: 10323.0. Samples: 302560596. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:28,977][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:30,038][626795] Updated weights for policy 0, policy_version 269822 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:32,050][626795] Updated weights for policy 0, policy_version 269832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:33,961][626795] Updated weights for policy 0, policy_version 269842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.8, 300 sec: 41376.6). Total num frames: 2210545664. Throughput: 0: 10340.5. Samples: 302622996. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:33,976][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:35,886][626795] Updated weights for policy 0, policy_version 269852 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:37,918][626795] Updated weights for policy 0, policy_version 269862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:38,975][24592] Fps is (10 sec: 41782.5, 60 sec: 41369.6, 300 sec: 41376.5). Total num frames: 2210750464. Throughput: 0: 10349.1. Samples: 302685930. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:38,976][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:39,928][626795] Updated weights for policy 0, policy_version 269872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:41,944][626795] Updated weights for policy 0, policy_version 269882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:43,881][626795] Updated weights for policy 0, policy_version 269892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:43,976][24592] Fps is (10 sec: 40957.9, 60 sec: 41369.3, 300 sec: 41348.7). Total num frames: 2210955264. Throughput: 0: 10327.1. Samples: 302716026. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:43,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:45,908][626795] Updated weights for policy 0, policy_version 269902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:47,898][626795] Updated weights for policy 0, policy_version 269912 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:48,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41369.5, 300 sec: 41321.1). Total num frames: 2211160064. Throughput: 0: 10327.4. Samples: 302778192. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:48,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:49,873][626795] Updated weights for policy 0, policy_version 269922 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:51,850][626795] Updated weights for policy 0, policy_version 269932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:53,854][626795] Updated weights for policy 0, policy_version 269942 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:53,975][24592] Fps is (10 sec: 41781.3, 60 sec: 41369.9, 300 sec: 41348.8). Total num frames: 2211373056. Throughput: 0: 10347.3. Samples: 302840544. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:53,976][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:55,802][626795] Updated weights for policy 0, policy_version 269952 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:57,696][626795] Updated weights for policy 0, policy_version 269962 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.5, 300 sec: 41348.8). Total num frames: 2211577856. Throughput: 0: 10333.5. Samples: 302870814. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:13:58,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:13:59,672][626795] Updated weights for policy 0, policy_version 269972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:01,646][626795] Updated weights for policy 0, policy_version 269982 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:03,571][626795] Updated weights for policy 0, policy_version 269992 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:03,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41369.5, 300 sec: 41348.9). Total num frames: 2211782656. Throughput: 0: 10375.2. Samples: 302934006. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:03,976][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:05,608][626795] Updated weights for policy 0, policy_version 270002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:07,647][626795] Updated weights for policy 0, policy_version 270012 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:08,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41369.6, 300 sec: 41348.8). Total num frames: 2211987456. Throughput: 0: 10375.5. Samples: 302996718. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:08,976][24592] Avg episode reward: [(0, '4.798')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:09,552][626795] Updated weights for policy 0, policy_version 270022 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:11,559][626795] Updated weights for policy 0, policy_version 270032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:13,618][626795] Updated weights for policy 0, policy_version 270042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:13,976][24592] Fps is (10 sec: 40959.6, 60 sec: 41369.6, 300 sec: 41348.8). Total num frames: 2212192256. Throughput: 0: 10357.4. Samples: 303026670. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:13,978][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:15,708][626795] Updated weights for policy 0, policy_version 270052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:17,600][626795] Updated weights for policy 0, policy_version 270062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:18,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41506.5, 300 sec: 41348.8). Total num frames: 2212405248. Throughput: 0: 10331.1. Samples: 303087894. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:18,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:19,619][626795] Updated weights for policy 0, policy_version 270072 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:21,662][626795] Updated weights for policy 0, policy_version 270082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:23,575][626795] Updated weights for policy 0, policy_version 270092 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:23,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41369.6, 300 sec: 41348.8). Total num frames: 2212610048. Throughput: 0: 10313.2. Samples: 303150024. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:23,976][24592] Avg episode reward: [(0, '5.065')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:25,565][626795] Updated weights for policy 0, policy_version 270102 (0.0039)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:27,599][626795] Updated weights for policy 0, policy_version 270112 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:28,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41370.2, 300 sec: 41321.0). Total num frames: 2212814848. Throughput: 0: 10329.3. Samples: 303180840. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:28,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:29,455][626795] Updated weights for policy 0, policy_version 270122 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:31,422][626795] Updated weights for policy 0, policy_version 270132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:33,461][626795] Updated weights for policy 0, policy_version 270142 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:33,976][24592] Fps is (10 sec: 40957.7, 60 sec: 41232.7, 300 sec: 41321.0). Total num frames: 2213019648. Throughput: 0: 10324.1. Samples: 303242784. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:33,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:35,365][626795] Updated weights for policy 0, policy_version 270152 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:37,403][626795] Updated weights for policy 0, policy_version 270162 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:38,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2213232640. Throughput: 0: 10348.0. Samples: 303306204. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:38,976][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:39,358][626795] Updated weights for policy 0, policy_version 270172 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:41,279][626795] Updated weights for policy 0, policy_version 270182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:43,242][626795] Updated weights for policy 0, policy_version 270192 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:43,976][24592] Fps is (10 sec: 42598.4, 60 sec: 41506.1, 300 sec: 41320.9). Total num frames: 2213445632. Throughput: 0: 10368.0. Samples: 303337380. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:43,977][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:45,345][626795] Updated weights for policy 0, policy_version 270202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:47,127][626795] Updated weights for policy 0, policy_version 270212 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:48,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41369.6, 300 sec: 41293.2). Total num frames: 2213642240. Throughput: 0: 10331.3. Samples: 303398916. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:48,978][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:49,203][626795] Updated weights for policy 0, policy_version 270222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:51,220][626795] Updated weights for policy 0, policy_version 270232 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:53,178][626795] Updated weights for policy 0, policy_version 270242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:53,975][24592] Fps is (10 sec: 40143.1, 60 sec: 41233.1, 300 sec: 41293.3). Total num frames: 2213847040. Throughput: 0: 10327.6. Samples: 303461460. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:53,977][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:55,201][626795] Updated weights for policy 0, policy_version 270252 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:57,098][626795] Updated weights for policy 0, policy_version 270262 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:58,977][24592] Fps is (10 sec: 41774.7, 60 sec: 41368.9, 300 sec: 41320.9). Total num frames: 2214060032. Throughput: 0: 10335.1. Samples: 303491760. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:14:58,978][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:14:59,160][626795] Updated weights for policy 0, policy_version 270272 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:01,088][626795] Updated weights for policy 0, policy_version 270282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:03,004][626795] Updated weights for policy 0, policy_version 270292 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.7, 300 sec: 41321.1). Total num frames: 2214264832. Throughput: 0: 10377.7. Samples: 303554892. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:03,976][24592] Avg episode reward: [(0, '4.961')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000270296_2214264832.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:04,094][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000269085_2204344320.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:05,063][626795] Updated weights for policy 0, policy_version 270302 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:07,090][626795] Updated weights for policy 0, policy_version 270312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:08,889][626795] Updated weights for policy 0, policy_version 270322 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:08,975][24592] Fps is (10 sec: 41783.8, 60 sec: 41506.1, 300 sec: 41348.8). Total num frames: 2214477824. Throughput: 0: 10385.2. Samples: 303617358. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:08,976][24592] Avg episode reward: [(0, '4.423')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:10,919][626795] Updated weights for policy 0, policy_version 270332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:12,884][626795] Updated weights for policy 0, policy_version 270342 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:13,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41506.2, 300 sec: 41348.8). Total num frames: 2214682624. Throughput: 0: 10386.3. Samples: 303648222. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:13,977][24592] Avg episode reward: [(0, '4.853')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:14,952][626795] Updated weights for policy 0, policy_version 270352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:16,830][626795] Updated weights for policy 0, policy_version 270362 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:18,886][626795] Updated weights for policy 0, policy_version 270372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:18,976][24592] Fps is (10 sec: 40956.9, 60 sec: 41369.0, 300 sec: 41320.9). Total num frames: 2214887424. Throughput: 0: 10379.4. Samples: 303709860. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:18,978][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:20,830][626795] Updated weights for policy 0, policy_version 270382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:22,820][626795] Updated weights for policy 0, policy_version 270392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:23,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41506.1, 300 sec: 41376.6). Total num frames: 2215100416. Throughput: 0: 10347.5. Samples: 303771840. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:23,979][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:24,691][626795] Updated weights for policy 0, policy_version 270402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:26,803][626795] Updated weights for policy 0, policy_version 270412 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:28,776][626795] Updated weights for policy 0, policy_version 270422 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:28,976][24592] Fps is (10 sec: 40961.4, 60 sec: 41369.3, 300 sec: 41321.0). Total num frames: 2215297024. Throughput: 0: 10345.0. Samples: 303802902. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:28,978][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:30,818][626795] Updated weights for policy 0, policy_version 270432 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:32,694][626795] Updated weights for policy 0, policy_version 270442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:33,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41506.5, 300 sec: 41348.8). Total num frames: 2215510016. Throughput: 0: 10364.4. Samples: 303865314. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:33,976][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:34,759][626795] Updated weights for policy 0, policy_version 270452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:36,630][626795] Updated weights for policy 0, policy_version 270462 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:38,657][626795] Updated weights for policy 0, policy_version 270472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:38,975][24592] Fps is (10 sec: 41781.1, 60 sec: 41369.6, 300 sec: 41348.8). Total num frames: 2215714816. Throughput: 0: 10356.4. Samples: 303927498. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:38,977][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:40,717][626795] Updated weights for policy 0, policy_version 270482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:42,579][626795] Updated weights for policy 0, policy_version 270492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:43,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41370.0, 300 sec: 41376.7). Total num frames: 2215927808. Throughput: 0: 10374.3. Samples: 303958590. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:43,978][24592] Avg episode reward: [(0, '4.884')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:44,473][626795] Updated weights for policy 0, policy_version 270502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:46,622][626795] Updated weights for policy 0, policy_version 270512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:48,627][626795] Updated weights for policy 0, policy_version 270522 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:48,976][24592] Fps is (10 sec: 41775.2, 60 sec: 41505.5, 300 sec: 41348.7). Total num frames: 2216132608. Throughput: 0: 10354.0. Samples: 304020834. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:48,978][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:50,621][626795] Updated weights for policy 0, policy_version 270532 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:52,751][626795] Updated weights for policy 0, policy_version 270542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:53,975][24592] Fps is (10 sec: 40140.8, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2216329216. Throughput: 0: 10282.7. Samples: 304080078. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:53,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:54,765][626795] Updated weights for policy 0, policy_version 270552 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:56,680][626795] Updated weights for policy 0, policy_version 270562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:15:58,645][626795] Updated weights for policy 0, policy_version 270572 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:58,975][24592] Fps is (10 sec: 40144.6, 60 sec: 41233.8, 300 sec: 41348.8). Total num frames: 2216534016. Throughput: 0: 10293.6. Samples: 304111434. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:15:58,978][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:00,726][626795] Updated weights for policy 0, policy_version 270582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:02,647][626795] Updated weights for policy 0, policy_version 270592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:03,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41233.0, 300 sec: 41348.8). Total num frames: 2216738816. Throughput: 0: 10290.6. Samples: 304172928. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:03,976][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:04,588][626795] Updated weights for policy 0, policy_version 270602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:06,552][626795] Updated weights for policy 0, policy_version 270612 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:08,634][626795] Updated weights for policy 0, policy_version 270622 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:08,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41233.1, 300 sec: 41376.5). Total num frames: 2216951808. Throughput: 0: 10306.8. Samples: 304235646. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:08,976][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:10,490][626795] Updated weights for policy 0, policy_version 270632 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:12,507][626795] Updated weights for policy 0, policy_version 270642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:13,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41233.1, 300 sec: 41348.8). Total num frames: 2217156608. Throughput: 0: 10304.8. Samples: 304266612. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:13,976][24592] Avg episode reward: [(0, '4.801')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:14,463][626795] Updated weights for policy 0, policy_version 270652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:16,557][626795] Updated weights for policy 0, policy_version 270662 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:18,346][626795] Updated weights for policy 0, policy_version 270672 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:18,977][24592] Fps is (10 sec: 40955.1, 60 sec: 41232.8, 300 sec: 41348.6). Total num frames: 2217361408. Throughput: 0: 10294.5. Samples: 304328580. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:18,977][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:20,519][626795] Updated weights for policy 0, policy_version 270682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:22,489][626795] Updated weights for policy 0, policy_version 270692 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:23,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41096.4, 300 sec: 41348.8). Total num frames: 2217566208. Throughput: 0: 10275.0. Samples: 304389876. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:23,978][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:24,561][626795] Updated weights for policy 0, policy_version 270702 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:26,516][626795] Updated weights for policy 0, policy_version 270712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:28,425][626795] Updated weights for policy 0, policy_version 270722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:28,975][24592] Fps is (10 sec: 41783.9, 60 sec: 41369.9, 300 sec: 41376.5). Total num frames: 2217779200. Throughput: 0: 10272.1. Samples: 304420836. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:28,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:30,512][626795] Updated weights for policy 0, policy_version 270732 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:32,459][626795] Updated weights for policy 0, policy_version 270742 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:33,975][24592] Fps is (10 sec: 40960.7, 60 sec: 41096.6, 300 sec: 41348.8). Total num frames: 2217975808. Throughput: 0: 10255.3. Samples: 304482312. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:33,977][24592] Avg episode reward: [(0, '4.856')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:34,529][626795] Updated weights for policy 0, policy_version 270752 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:36,366][626795] Updated weights for policy 0, policy_version 270762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:38,413][626795] Updated weights for policy 0, policy_version 270772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:38,976][24592] Fps is (10 sec: 40139.6, 60 sec: 41096.3, 300 sec: 41320.9). Total num frames: 2218180608. Throughput: 0: 10332.2. Samples: 304545030. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:38,977][24592] Avg episode reward: [(0, '4.362')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:40,440][626795] Updated weights for policy 0, policy_version 270782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:42,410][626795] Updated weights for policy 0, policy_version 270792 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:43,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41096.4, 300 sec: 41348.7). Total num frames: 2218393600. Throughput: 0: 10313.0. Samples: 304575522. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:43,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:44,348][626795] Updated weights for policy 0, policy_version 270802 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:46,328][626795] Updated weights for policy 0, policy_version 270812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:48,242][626795] Updated weights for policy 0, policy_version 270822 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:48,981][24592] Fps is (10 sec: 41757.3, 60 sec: 41093.3, 300 sec: 41348.0). Total num frames: 2218598400. Throughput: 0: 10330.2. Samples: 304637844. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:48,982][24592] Avg episode reward: [(0, '4.999')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:50,189][626795] Updated weights for policy 0, policy_version 270832 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:52,210][626795] Updated weights for policy 0, policy_version 270842 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:53,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41233.0, 300 sec: 41321.0). Total num frames: 2218803200. Throughput: 0: 10319.5. Samples: 304700022. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:53,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:54,309][626795] Updated weights for policy 0, policy_version 270852 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:56,331][626795] Updated weights for policy 0, policy_version 270862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:16:58,293][626795] Updated weights for policy 0, policy_version 270872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:58,975][24592] Fps is (10 sec: 40163.1, 60 sec: 41096.5, 300 sec: 41293.2). Total num frames: 2218999808. Throughput: 0: 10289.4. Samples: 304729638. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:16:58,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:00,397][626795] Updated weights for policy 0, policy_version 270882 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:02,168][626795] Updated weights for policy 0, policy_version 270892 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:03,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41233.1, 300 sec: 41321.0). Total num frames: 2219212800. Throughput: 0: 10281.3. Samples: 304791228. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:03,978][24592] Avg episode reward: [(0, '4.904')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000270900_2219212800.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:04,078][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000269691_2209308672.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:04,363][626795] Updated weights for policy 0, policy_version 270902 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:06,392][626795] Updated weights for policy 0, policy_version 270912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:08,314][626795] Updated weights for policy 0, policy_version 270922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:08,976][24592] Fps is (10 sec: 40957.8, 60 sec: 40959.6, 300 sec: 41293.1). Total num frames: 2219409408. Throughput: 0: 10277.4. Samples: 304852362. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:08,978][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:10,300][626795] Updated weights for policy 0, policy_version 270932 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:12,317][626795] Updated weights for policy 0, policy_version 270942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:13,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41096.5, 300 sec: 41293.3). Total num frames: 2219622400. Throughput: 0: 10277.8. Samples: 304883334. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:13,976][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:14,310][626795] Updated weights for policy 0, policy_version 270952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:16,303][626795] Updated weights for policy 0, policy_version 270962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:18,127][626795] Updated weights for policy 0, policy_version 270972 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:18,975][24592] Fps is (10 sec: 42601.0, 60 sec: 41233.9, 300 sec: 41321.0). Total num frames: 2219835392. Throughput: 0: 10302.4. Samples: 304945920. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:18,977][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:20,237][626795] Updated weights for policy 0, policy_version 270982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:22,216][626795] Updated weights for policy 0, policy_version 270992 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:23,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41233.1, 300 sec: 41321.0). Total num frames: 2220040192. Throughput: 0: 10270.1. Samples: 305007180. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:23,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:24,264][626795] Updated weights for policy 0, policy_version 271002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:26,197][626795] Updated weights for policy 0, policy_version 271012 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:28,241][626795] Updated weights for policy 0, policy_version 271022 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:28,975][24592] Fps is (10 sec: 40140.6, 60 sec: 40960.0, 300 sec: 41265.5). Total num frames: 2220236800. Throughput: 0: 10255.0. Samples: 305036994. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:28,977][24592] Avg episode reward: [(0, '4.886')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:30,345][626795] Updated weights for policy 0, policy_version 271032 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:32,318][626795] Updated weights for policy 0, policy_version 271042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:33,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41233.0, 300 sec: 41293.2). Total num frames: 2220449792. Throughput: 0: 10234.9. Samples: 305098356. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:33,976][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:34,270][626795] Updated weights for policy 0, policy_version 271052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:36,320][626795] Updated weights for policy 0, policy_version 271062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:38,287][626795] Updated weights for policy 0, policy_version 271072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:38,977][24592] Fps is (10 sec: 40954.3, 60 sec: 41095.8, 300 sec: 41265.3). Total num frames: 2220646400. Throughput: 0: 10218.1. Samples: 305159850. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:38,979][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:40,303][626795] Updated weights for policy 0, policy_version 271082 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:42,329][626795] Updated weights for policy 0, policy_version 271092 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:43,976][24592] Fps is (10 sec: 40139.7, 60 sec: 40959.9, 300 sec: 41265.4). Total num frames: 2220851200. Throughput: 0: 10245.5. Samples: 305190690. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:43,979][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:44,230][626795] Updated weights for policy 0, policy_version 271102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:46,201][626795] Updated weights for policy 0, policy_version 271112 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:48,309][626795] Updated weights for policy 0, policy_version 271122 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:48,975][24592] Fps is (10 sec: 40966.2, 60 sec: 40963.9, 300 sec: 41237.8). Total num frames: 2221056000. Throughput: 0: 10248.1. Samples: 305252394. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:48,977][24592] Avg episode reward: [(0, '5.115')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:50,223][626795] Updated weights for policy 0, policy_version 271132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:52,294][626795] Updated weights for policy 0, policy_version 271142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:53,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41096.2, 300 sec: 41265.4). Total num frames: 2221268992. Throughput: 0: 10284.4. Samples: 305315160. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:53,979][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:54,094][626795] Updated weights for policy 0, policy_version 271152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:56,268][626795] Updated weights for policy 0, policy_version 271162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:17:58,298][626795] Updated weights for policy 0, policy_version 271172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:58,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41096.6, 300 sec: 41237.7). Total num frames: 2221465600. Throughput: 0: 10255.1. Samples: 305344812. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:17:58,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:00,246][626795] Updated weights for policy 0, policy_version 271182 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:02,369][626795] Updated weights for policy 0, policy_version 271192 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:03,976][24592] Fps is (10 sec: 40958.7, 60 sec: 41096.0, 300 sec: 41265.3). Total num frames: 2221678592. Throughput: 0: 10213.1. Samples: 305405520. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:03,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:04,248][626795] Updated weights for policy 0, policy_version 271202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:06,239][626795] Updated weights for policy 0, policy_version 271212 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:08,260][626795] Updated weights for policy 0, policy_version 271222 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:08,976][24592] Fps is (10 sec: 40956.1, 60 sec: 41096.3, 300 sec: 41237.6). Total num frames: 2221875200. Throughput: 0: 10231.1. Samples: 305467590. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:08,978][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:10,330][626795] Updated weights for policy 0, policy_version 271232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:12,274][626795] Updated weights for policy 0, policy_version 271242 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:13,976][24592] Fps is (10 sec: 40140.3, 60 sec: 40959.3, 300 sec: 41237.6). Total num frames: 2222080000. Throughput: 0: 10242.1. Samples: 305497896. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:13,980][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:14,262][626795] Updated weights for policy 0, policy_version 271252 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:16,185][626772] Signal inference workers to stop experience collection... (4050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:16,185][626772] Signal inference workers to resume experience collection... (4050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:16,199][626795] InferenceWorker_p0-w0: stopping experience collection (4050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:16,203][626795] InferenceWorker_p0-w0: resuming experience collection (4050 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:16,218][626795] Updated weights for policy 0, policy_version 271262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:18,312][626795] Updated weights for policy 0, policy_version 271272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:18,975][24592] Fps is (10 sec: 40964.0, 60 sec: 40823.5, 300 sec: 41209.9). Total num frames: 2222284800. Throughput: 0: 10247.6. Samples: 305559498. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:18,977][24592] Avg episode reward: [(0, '4.869')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:20,213][626795] Updated weights for policy 0, policy_version 271282 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:22,130][626795] Updated weights for policy 0, policy_version 271292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:23,975][24592] Fps is (10 sec: 40964.1, 60 sec: 40823.5, 300 sec: 41210.0). Total num frames: 2222489600. Throughput: 0: 10273.0. Samples: 305622120. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:23,976][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:24,239][626795] Updated weights for policy 0, policy_version 271302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:26,128][626795] Updated weights for policy 0, policy_version 271312 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:28,118][626795] Updated weights for policy 0, policy_version 271322 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:28,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40960.0, 300 sec: 41182.1). Total num frames: 2222694400. Throughput: 0: 10260.1. Samples: 305652390. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:28,976][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:30,200][626795] Updated weights for policy 0, policy_version 271332 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:32,369][626795] Updated weights for policy 0, policy_version 271342 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:33,976][24592] Fps is (10 sec: 40958.7, 60 sec: 40823.3, 300 sec: 41182.1). Total num frames: 2222899200. Throughput: 0: 10235.5. Samples: 305712996. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:33,977][24592] Avg episode reward: [(0, '4.966')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:34,304][626795] Updated weights for policy 0, policy_version 271352 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:36,381][626795] Updated weights for policy 0, policy_version 271362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:38,218][626795] Updated weights for policy 0, policy_version 271372 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:38,976][24592] Fps is (10 sec: 40960.0, 60 sec: 40961.0, 300 sec: 41182.2). Total num frames: 2223104000. Throughput: 0: 10201.4. Samples: 305774220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:38,978][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:40,257][626795] Updated weights for policy 0, policy_version 271382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:42,209][626795] Updated weights for policy 0, policy_version 271392 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:43,975][24592] Fps is (10 sec: 40961.3, 60 sec: 40960.2, 300 sec: 41182.2). Total num frames: 2223308800. Throughput: 0: 10230.0. Samples: 305805162. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:43,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:44,329][626795] Updated weights for policy 0, policy_version 271402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:46,229][626795] Updated weights for policy 0, policy_version 271412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:48,217][626795] Updated weights for policy 0, policy_version 271422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:48,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40959.9, 300 sec: 41154.4). Total num frames: 2223513600. Throughput: 0: 10253.8. Samples: 305866932. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:48,976][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:50,202][626795] Updated weights for policy 0, policy_version 271432 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:52,209][626795] Updated weights for policy 0, policy_version 271442 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:53,976][24592] Fps is (10 sec: 41777.6, 60 sec: 40960.1, 300 sec: 41182.1). Total num frames: 2223726592. Throughput: 0: 10245.1. Samples: 305928612. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:53,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:54,238][626795] Updated weights for policy 0, policy_version 271452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:56,287][626795] Updated weights for policy 0, policy_version 271462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:18:58,279][626795] Updated weights for policy 0, policy_version 271472 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:58,976][24592] Fps is (10 sec: 40956.6, 60 sec: 40959.4, 300 sec: 41154.3). Total num frames: 2223923200. Throughput: 0: 10244.9. Samples: 305958918. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:18:58,978][24592] Avg episode reward: [(0, '4.377')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:00,269][626795] Updated weights for policy 0, policy_version 271482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:02,388][626795] Updated weights for policy 0, policy_version 271492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:03,975][24592] Fps is (10 sec: 39322.8, 60 sec: 40687.5, 300 sec: 41126.6). Total num frames: 2224119808. Throughput: 0: 10223.3. Samples: 306019548. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:03,977][24592] Avg episode reward: [(0, '5.002')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:04,023][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000271500_2224128000.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:04,132][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000270296_2214264832.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:04,379][626795] Updated weights for policy 0, policy_version 271502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:06,476][626795] Updated weights for policy 0, policy_version 271512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:08,403][626795] Updated weights for policy 0, policy_version 271522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:08,975][24592] Fps is (10 sec: 40144.5, 60 sec: 40824.1, 300 sec: 41126.6). Total num frames: 2224324608. Throughput: 0: 10175.7. Samples: 306080028. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:08,976][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:10,522][626795] Updated weights for policy 0, policy_version 271532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:12,463][626795] Updated weights for policy 0, policy_version 271542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:13,975][24592] Fps is (10 sec: 40960.3, 60 sec: 40824.1, 300 sec: 41098.8). Total num frames: 2224529408. Throughput: 0: 10178.7. Samples: 306110430. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:13,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:14,408][626795] Updated weights for policy 0, policy_version 271552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:16,498][626795] Updated weights for policy 0, policy_version 271562 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:18,410][626795] Updated weights for policy 0, policy_version 271572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:18,976][24592] Fps is (10 sec: 40959.4, 60 sec: 40823.4, 300 sec: 41098.8). Total num frames: 2224734208. Throughput: 0: 10197.8. Samples: 306171894. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:18,977][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:20,457][626795] Updated weights for policy 0, policy_version 271582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:22,397][626795] Updated weights for policy 0, policy_version 271592 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:23,976][24592] Fps is (10 sec: 40959.3, 60 sec: 40823.4, 300 sec: 41098.8). Total num frames: 2224939008. Throughput: 0: 10219.9. Samples: 306234114. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:23,977][24592] Avg episode reward: [(0, '4.929')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:24,452][626795] Updated weights for policy 0, policy_version 271602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:26,439][626795] Updated weights for policy 0, policy_version 271612 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:28,324][626795] Updated weights for policy 0, policy_version 271622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:28,976][24592] Fps is (10 sec: 41779.3, 60 sec: 40960.0, 300 sec: 41126.7). Total num frames: 2225152000. Throughput: 0: 10217.6. Samples: 306264954. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:28,977][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:30,326][626795] Updated weights for policy 0, policy_version 271632 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:32,289][626795] Updated weights for policy 0, policy_version 271642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:33,976][24592] Fps is (10 sec: 41777.6, 60 sec: 40959.8, 300 sec: 41098.8). Total num frames: 2225356800. Throughput: 0: 10224.7. Samples: 306327048. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:33,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:34,327][626795] Updated weights for policy 0, policy_version 271652 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:36,417][626795] Updated weights for policy 0, policy_version 271662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:38,383][626795] Updated weights for policy 0, policy_version 271672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:38,975][24592] Fps is (10 sec: 40141.2, 60 sec: 40823.5, 300 sec: 41043.4). Total num frames: 2225553408. Throughput: 0: 10207.3. Samples: 306387936. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:38,977][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:40,395][626795] Updated weights for policy 0, policy_version 271682 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:42,333][626795] Updated weights for policy 0, policy_version 271692 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:43,975][24592] Fps is (10 sec: 40962.4, 60 sec: 40960.0, 300 sec: 41098.9). Total num frames: 2225766400. Throughput: 0: 10220.0. Samples: 306418806. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:43,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:44,399][626795] Updated weights for policy 0, policy_version 271702 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:46,351][626795] Updated weights for policy 0, policy_version 271712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:48,224][626795] Updated weights for policy 0, policy_version 271722 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40960.1, 300 sec: 41098.8). Total num frames: 2225971200. Throughput: 0: 10247.6. Samples: 306480690. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:48,977][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:50,248][626795] Updated weights for policy 0, policy_version 271732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:52,203][626795] Updated weights for policy 0, policy_version 271742 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:53,975][24592] Fps is (10 sec: 41778.5, 60 sec: 40960.2, 300 sec: 41099.0). Total num frames: 2226184192. Throughput: 0: 10305.6. Samples: 306543780. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:53,978][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:54,228][626795] Updated weights for policy 0, policy_version 271752 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:56,147][626795] Updated weights for policy 0, policy_version 271762 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:19:58,149][626795] Updated weights for policy 0, policy_version 271772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:58,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41096.9, 300 sec: 41098.8). Total num frames: 2226388992. Throughput: 0: 10301.1. Samples: 306573984. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:19:58,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:00,108][626795] Updated weights for policy 0, policy_version 271782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:02,453][626795] Updated weights for policy 0, policy_version 271792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:03,976][24592] Fps is (10 sec: 39321.2, 60 sec: 40959.9, 300 sec: 41015.5). Total num frames: 2226577408. Throughput: 0: 10238.9. Samples: 306632646. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:03,977][24592] Avg episode reward: [(0, '4.839')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:04,606][626795] Updated weights for policy 0, policy_version 271802 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:06,680][626795] Updated weights for policy 0, policy_version 271812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:08,734][626795] Updated weights for policy 0, policy_version 271822 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:08,975][24592] Fps is (10 sec: 38503.6, 60 sec: 40823.4, 300 sec: 40987.8). Total num frames: 2226774016. Throughput: 0: 10163.4. Samples: 306691464. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:08,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:10,812][626795] Updated weights for policy 0, policy_version 271832 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:12,686][626795] Updated weights for policy 0, policy_version 271842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:13,975][24592] Fps is (10 sec: 39322.4, 60 sec: 40686.9, 300 sec: 40960.1). Total num frames: 2226970624. Throughput: 0: 10151.6. Samples: 306721776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:13,978][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:14,821][626795] Updated weights for policy 0, policy_version 271852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:16,755][626795] Updated weights for policy 0, policy_version 271862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:18,766][626795] Updated weights for policy 0, policy_version 271872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:18,975][24592] Fps is (10 sec: 40140.9, 60 sec: 40687.0, 300 sec: 40932.2). Total num frames: 2227175424. Throughput: 0: 10131.7. Samples: 306782970. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:18,977][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:20,739][626795] Updated weights for policy 0, policy_version 271882 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:22,780][626795] Updated weights for policy 0, policy_version 271892 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:23,975][24592] Fps is (10 sec: 41779.5, 60 sec: 40823.6, 300 sec: 40987.8). Total num frames: 2227388416. Throughput: 0: 10156.7. Samples: 306844986. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:23,976][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:24,751][626795] Updated weights for policy 0, policy_version 271902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:26,768][626795] Updated weights for policy 0, policy_version 271912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:28,691][626795] Updated weights for policy 0, policy_version 271922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:28,976][24592] Fps is (10 sec: 41779.1, 60 sec: 40687.0, 300 sec: 40960.0). Total num frames: 2227593216. Throughput: 0: 10154.9. Samples: 306875778. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:28,977][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:30,763][626795] Updated weights for policy 0, policy_version 271932 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:32,612][626795] Updated weights for policy 0, policy_version 271942 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:33,975][24592] Fps is (10 sec: 41778.8, 60 sec: 40823.8, 300 sec: 40987.8). Total num frames: 2227806208. Throughput: 0: 10166.1. Samples: 306938166. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:33,977][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:34,583][626795] Updated weights for policy 0, policy_version 271952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:36,715][626795] Updated weights for policy 0, policy_version 271962 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:38,681][626795] Updated weights for policy 0, policy_version 271972 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:38,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40823.5, 300 sec: 40932.2). Total num frames: 2228002816. Throughput: 0: 10118.0. Samples: 306999090. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:38,977][24592] Avg episode reward: [(0, '4.402')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:40,708][626795] Updated weights for policy 0, policy_version 271982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:42,751][626795] Updated weights for policy 0, policy_version 271992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:43,975][24592] Fps is (10 sec: 40141.2, 60 sec: 40686.9, 300 sec: 40932.4). Total num frames: 2228207616. Throughput: 0: 10112.0. Samples: 307029018. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:43,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:44,817][626795] Updated weights for policy 0, policy_version 272002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:46,789][626795] Updated weights for policy 0, policy_version 272012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:48,856][626795] Updated weights for policy 0, policy_version 272022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:48,976][24592] Fps is (10 sec: 40140.4, 60 sec: 40550.3, 300 sec: 40932.2). Total num frames: 2228404224. Throughput: 0: 10153.2. Samples: 307089540. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:48,977][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:50,863][626795] Updated weights for policy 0, policy_version 272032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:52,862][626795] Updated weights for policy 0, policy_version 272042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:53,975][24592] Fps is (10 sec: 40140.8, 60 sec: 40414.0, 300 sec: 40932.2). Total num frames: 2228609024. Throughput: 0: 10211.2. Samples: 307150968. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:53,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:54,880][626795] Updated weights for policy 0, policy_version 272052 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:56,948][626795] Updated weights for policy 0, policy_version 272062 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:20:58,732][626795] Updated weights for policy 0, policy_version 272072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:58,979][24592] Fps is (10 sec: 40947.5, 60 sec: 40411.9, 300 sec: 40931.8). Total num frames: 2228813824. Throughput: 0: 10217.1. Samples: 307181580. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:20:58,982][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:00,867][626795] Updated weights for policy 0, policy_version 272082 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:02,768][626795] Updated weights for policy 0, policy_version 272092 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:03,976][24592] Fps is (10 sec: 41778.0, 60 sec: 40823.4, 300 sec: 40932.2). Total num frames: 2229026816. Throughput: 0: 10240.1. Samples: 307243776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:03,977][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000272098_2229026816.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:04,115][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000270900_2219212800.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:04,843][626795] Updated weights for policy 0, policy_version 272102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:06,887][626795] Updated weights for policy 0, policy_version 272112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:08,719][626795] Updated weights for policy 0, policy_version 272122 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:08,977][24592] Fps is (10 sec: 41787.2, 60 sec: 40959.2, 300 sec: 40932.0). Total num frames: 2229231616. Throughput: 0: 10231.7. Samples: 307305426. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:08,978][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:10,830][626795] Updated weights for policy 0, policy_version 272132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:12,891][626795] Updated weights for policy 0, policy_version 272142 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:13,975][24592] Fps is (10 sec: 40141.6, 60 sec: 40960.0, 300 sec: 40904.6). Total num frames: 2229428224. Throughput: 0: 10203.1. Samples: 307334916. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:13,977][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:14,895][626795] Updated weights for policy 0, policy_version 272152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:16,952][626795] Updated weights for policy 0, policy_version 272162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:18,902][626795] Updated weights for policy 0, policy_version 272172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:18,975][24592] Fps is (10 sec: 40145.8, 60 sec: 40960.0, 300 sec: 40904.5). Total num frames: 2229633024. Throughput: 0: 10169.1. Samples: 307395774. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:18,976][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:20,873][626795] Updated weights for policy 0, policy_version 272182 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:22,942][626795] Updated weights for policy 0, policy_version 272192 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:23,975][24592] Fps is (10 sec: 40960.4, 60 sec: 40823.5, 300 sec: 40876.7). Total num frames: 2229837824. Throughput: 0: 10194.0. Samples: 307457820. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:23,976][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:24,994][626795] Updated weights for policy 0, policy_version 272202 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:26,903][626795] Updated weights for policy 0, policy_version 272212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:28,782][626795] Updated weights for policy 0, policy_version 272222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:28,975][24592] Fps is (10 sec: 40960.3, 60 sec: 40823.5, 300 sec: 40904.5). Total num frames: 2230042624. Throughput: 0: 10202.4. Samples: 307488126. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:28,976][24592] Avg episode reward: [(0, '4.564')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:30,834][626795] Updated weights for policy 0, policy_version 272232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:32,909][626795] Updated weights for policy 0, policy_version 272242 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:33,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40687.0, 300 sec: 40904.5). Total num frames: 2230247424. Throughput: 0: 10235.1. Samples: 307550118. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:33,977][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:34,829][626795] Updated weights for policy 0, policy_version 272252 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:36,878][626795] Updated weights for policy 0, policy_version 272262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:38,888][626795] Updated weights for policy 0, policy_version 272272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:38,976][24592] Fps is (10 sec: 40958.9, 60 sec: 40823.3, 300 sec: 40876.7). Total num frames: 2230452224. Throughput: 0: 10248.3. Samples: 307612146. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:38,977][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:40,831][626795] Updated weights for policy 0, policy_version 272282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:42,833][626795] Updated weights for policy 0, policy_version 272292 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:43,975][24592] Fps is (10 sec: 40959.6, 60 sec: 40823.4, 300 sec: 40877.5). Total num frames: 2230657024. Throughput: 0: 10243.5. Samples: 307642506. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:43,976][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:44,861][626795] Updated weights for policy 0, policy_version 272302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:46,930][626795] Updated weights for policy 0, policy_version 272312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:48,976][24592] Fps is (10 sec: 40137.7, 60 sec: 40822.9, 300 sec: 40848.8). Total num frames: 2230853632. Throughput: 0: 10201.0. Samples: 307702830. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:48,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:49,051][626795] Updated weights for policy 0, policy_version 272322 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:50,920][626795] Updated weights for policy 0, policy_version 272332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:52,882][626795] Updated weights for policy 0, policy_version 272342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:53,976][24592] Fps is (10 sec: 40959.6, 60 sec: 40959.8, 300 sec: 40904.4). Total num frames: 2231066624. Throughput: 0: 10195.7. Samples: 307764222. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:53,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:55,003][626795] Updated weights for policy 0, policy_version 272352 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:56,950][626795] Updated weights for policy 0, policy_version 272362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:58,975][24592] Fps is (10 sec: 40964.1, 60 sec: 40825.7, 300 sec: 40848.9). Total num frames: 2231263232. Throughput: 0: 10212.0. Samples: 307794456. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:21:58,977][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:21:59,031][626795] Updated weights for policy 0, policy_version 272372 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:00,980][626795] Updated weights for policy 0, policy_version 272382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:02,981][626795] Updated weights for policy 0, policy_version 272392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:03,976][24592] Fps is (10 sec: 40960.3, 60 sec: 40823.6, 300 sec: 40904.5). Total num frames: 2231476224. Throughput: 0: 10236.0. Samples: 307856394. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:03,978][24592] Avg episode reward: [(0, '4.934')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:04,942][626795] Updated weights for policy 0, policy_version 272402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:06,964][626795] Updated weights for policy 0, policy_version 272412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:08,965][626795] Updated weights for policy 0, policy_version 272422 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:08,976][24592] Fps is (10 sec: 41776.3, 60 sec: 40823.9, 300 sec: 40876.6). Total num frames: 2231681024. Throughput: 0: 10229.7. Samples: 307918164. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:08,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:10,875][626795] Updated weights for policy 0, policy_version 272432 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:12,868][626795] Updated weights for policy 0, policy_version 272442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:13,976][24592] Fps is (10 sec: 40959.9, 60 sec: 40959.9, 300 sec: 40848.9). Total num frames: 2231885824. Throughput: 0: 10238.5. Samples: 307948860. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:13,977][24592] Avg episode reward: [(0, '4.867')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:15,020][626795] Updated weights for policy 0, policy_version 272452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:16,875][626795] Updated weights for policy 0, policy_version 272462 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:18,944][626795] Updated weights for policy 0, policy_version 272472 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:18,975][24592] Fps is (10 sec: 40962.7, 60 sec: 40960.0, 300 sec: 40848.9). Total num frames: 2232090624. Throughput: 0: 10216.1. Samples: 308009844. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:18,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:21,025][626795] Updated weights for policy 0, policy_version 272482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:22,930][626795] Updated weights for policy 0, policy_version 272492 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:23,975][24592] Fps is (10 sec: 40141.4, 60 sec: 40823.5, 300 sec: 40848.9). Total num frames: 2232287232. Throughput: 0: 10196.5. Samples: 308070984. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:23,976][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:25,000][626795] Updated weights for policy 0, policy_version 272502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:26,984][626795] Updated weights for policy 0, policy_version 272512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:28,931][626795] Updated weights for policy 0, policy_version 272522 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:28,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40960.0, 300 sec: 40848.9). Total num frames: 2232500224. Throughput: 0: 10201.0. Samples: 308101548. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:28,977][24592] Avg episode reward: [(0, '4.887')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:30,913][626795] Updated weights for policy 0, policy_version 272532 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:33,001][626795] Updated weights for policy 0, policy_version 272542 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:33,981][24592] Fps is (10 sec: 41757.7, 60 sec: 40956.5, 300 sec: 40876.2). Total num frames: 2232705024. Throughput: 0: 10245.5. Samples: 308163918. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:33,982][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:34,855][626795] Updated weights for policy 0, policy_version 272552 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:36,911][626795] Updated weights for policy 0, policy_version 272562 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:38,914][626795] Updated weights for policy 0, policy_version 272572 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:38,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40960.2, 300 sec: 40876.7). Total num frames: 2232909824. Throughput: 0: 10251.0. Samples: 308225514. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:38,978][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:40,903][626795] Updated weights for policy 0, policy_version 272582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:42,890][626795] Updated weights for policy 0, policy_version 272592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:43,975][24592] Fps is (10 sec: 40981.3, 60 sec: 40960.1, 300 sec: 40876.7). Total num frames: 2233114624. Throughput: 0: 10273.3. Samples: 308256756. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:43,978][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:44,781][626795] Updated weights for policy 0, policy_version 272602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:46,848][626795] Updated weights for policy 0, policy_version 272612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:48,895][626795] Updated weights for policy 0, policy_version 272622 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:48,976][24592] Fps is (10 sec: 40955.4, 60 sec: 41096.5, 300 sec: 40848.8). Total num frames: 2233319424. Throughput: 0: 10257.2. Samples: 308317980. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:48,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:50,780][626795] Updated weights for policy 0, policy_version 272632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:52,904][626795] Updated weights for policy 0, policy_version 272642 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:53,975][24592] Fps is (10 sec: 40959.8, 60 sec: 40960.1, 300 sec: 40876.7). Total num frames: 2233524224. Throughput: 0: 10247.6. Samples: 308379300. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:53,976][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:54,971][626795] Updated weights for policy 0, policy_version 272652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:56,883][626795] Updated weights for policy 0, policy_version 272662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:22:58,848][626795] Updated weights for policy 0, policy_version 272672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:58,975][24592] Fps is (10 sec: 40964.5, 60 sec: 41096.5, 300 sec: 40849.0). Total num frames: 2233729024. Throughput: 0: 10239.8. Samples: 308409648. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:22:58,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:00,870][626795] Updated weights for policy 0, policy_version 272682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:02,823][626795] Updated weights for policy 0, policy_version 272692 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:03,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40960.1, 300 sec: 40876.8). Total num frames: 2233933824. Throughput: 0: 10270.8. Samples: 308472030. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:03,977][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:04,045][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000272698_2233942016.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:04,103][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000271500_2224128000.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:04,880][626795] Updated weights for policy 0, policy_version 272702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:06,787][626795] Updated weights for policy 0, policy_version 272712 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:08,870][626795] Updated weights for policy 0, policy_version 272722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:08,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40960.5, 300 sec: 40876.8). Total num frames: 2234138624. Throughput: 0: 10273.5. Samples: 308533290. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:08,977][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:10,798][626795] Updated weights for policy 0, policy_version 272732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:12,859][626795] Updated weights for policy 0, policy_version 272742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:13,975][24592] Fps is (10 sec: 40960.3, 60 sec: 40960.1, 300 sec: 40876.7). Total num frames: 2234343424. Throughput: 0: 10283.1. Samples: 308564286. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:13,977][24592] Avg episode reward: [(0, '4.830')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:14,797][626795] Updated weights for policy 0, policy_version 272752 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:16,806][626795] Updated weights for policy 0, policy_version 272762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:18,827][626795] Updated weights for policy 0, policy_version 272772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:18,975][24592] Fps is (10 sec: 40959.6, 60 sec: 40960.0, 300 sec: 40876.7). Total num frames: 2234548224. Throughput: 0: 10267.4. Samples: 308625900. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:18,979][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:20,766][626795] Updated weights for policy 0, policy_version 272782 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:22,828][626795] Updated weights for policy 0, policy_version 272792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:23,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41233.0, 300 sec: 40904.5). Total num frames: 2234761216. Throughput: 0: 10270.4. Samples: 308687682. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:23,976][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:24,844][626795] Updated weights for policy 0, policy_version 272802 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:26,876][626795] Updated weights for policy 0, policy_version 272812 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:28,893][626795] Updated weights for policy 0, policy_version 272822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:28,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40959.9, 300 sec: 40876.7). Total num frames: 2234957824. Throughput: 0: 10249.4. Samples: 308717982. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:28,976][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:30,801][626795] Updated weights for policy 0, policy_version 272832 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:32,847][626795] Updated weights for policy 0, policy_version 272842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:33,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41100.0, 300 sec: 40904.5). Total num frames: 2235170816. Throughput: 0: 10254.6. Samples: 308779428. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:33,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:34,686][626795] Updated weights for policy 0, policy_version 272852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:36,863][626795] Updated weights for policy 0, policy_version 272862 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:38,799][626795] Updated weights for policy 0, policy_version 272872 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:38,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41096.3, 300 sec: 40904.4). Total num frames: 2235375616. Throughput: 0: 10259.3. Samples: 308840970. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:38,977][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:40,724][626795] Updated weights for policy 0, policy_version 272882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:42,719][626795] Updated weights for policy 0, policy_version 272892 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:43,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41096.5, 300 sec: 40904.5). Total num frames: 2235580416. Throughput: 0: 10289.6. Samples: 308872680. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:43,978][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:44,792][626795] Updated weights for policy 0, policy_version 272902 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:46,597][626795] Updated weights for policy 0, policy_version 272912 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:48,686][626795] Updated weights for policy 0, policy_version 272922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:48,975][24592] Fps is (10 sec: 40961.1, 60 sec: 41097.3, 300 sec: 40876.7). Total num frames: 2235785216. Throughput: 0: 10286.8. Samples: 308934936. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:48,977][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:50,800][626795] Updated weights for policy 0, policy_version 272932 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:52,624][626795] Updated weights for policy 0, policy_version 272942 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:53,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41096.5, 300 sec: 40904.6). Total num frames: 2235990016. Throughput: 0: 10267.0. Samples: 308995308. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:53,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:54,807][626795] Updated weights for policy 0, policy_version 272952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:56,781][626795] Updated weights for policy 0, policy_version 272962 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:23:58,718][626795] Updated weights for policy 0, policy_version 272972 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:58,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41096.3, 300 sec: 40932.2). Total num frames: 2236194816. Throughput: 0: 10253.8. Samples: 309025710. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:23:58,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:00,710][626795] Updated weights for policy 0, policy_version 272982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:02,939][626795] Updated weights for policy 0, policy_version 272992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:03,975][24592] Fps is (10 sec: 40141.3, 60 sec: 40960.0, 300 sec: 40904.5). Total num frames: 2236391424. Throughput: 0: 10233.1. Samples: 309086388. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:03,976][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:04,758][626795] Updated weights for policy 0, policy_version 273002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:06,875][626795] Updated weights for policy 0, policy_version 273012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:08,880][626795] Updated weights for policy 0, policy_version 273022 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:08,976][24592] Fps is (10 sec: 40141.8, 60 sec: 40959.9, 300 sec: 40904.5). Total num frames: 2236596224. Throughput: 0: 10229.7. Samples: 309148020. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:08,979][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:10,794][626795] Updated weights for policy 0, policy_version 273032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:12,858][626795] Updated weights for policy 0, policy_version 273042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:13,975][24592] Fps is (10 sec: 40959.8, 60 sec: 40959.9, 300 sec: 40904.5). Total num frames: 2236801024. Throughput: 0: 10238.7. Samples: 309178722. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:13,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:14,848][626795] Updated weights for policy 0, policy_version 273052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:16,754][626795] Updated weights for policy 0, policy_version 273062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:18,763][626795] Updated weights for policy 0, policy_version 273072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:18,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41096.6, 300 sec: 40932.3). Total num frames: 2237014016. Throughput: 0: 10248.3. Samples: 309240600. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:18,977][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:20,735][626795] Updated weights for policy 0, policy_version 273082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:22,777][626795] Updated weights for policy 0, policy_version 273092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:23,976][24592] Fps is (10 sec: 41778.8, 60 sec: 40960.0, 300 sec: 40904.5). Total num frames: 2237218816. Throughput: 0: 10260.8. Samples: 309302706. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:23,977][24592] Avg episode reward: [(0, '4.857')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:24,787][626795] Updated weights for policy 0, policy_version 273102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:26,757][626795] Updated weights for policy 0, policy_version 273112 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:28,819][626795] Updated weights for policy 0, policy_version 273122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:28,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41096.5, 300 sec: 40904.5). Total num frames: 2237423616. Throughput: 0: 10226.7. Samples: 309332880. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:28,977][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:30,828][626795] Updated weights for policy 0, policy_version 273132 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:32,773][626795] Updated weights for policy 0, policy_version 273142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:33,975][24592] Fps is (10 sec: 40141.3, 60 sec: 40823.5, 300 sec: 40904.5). Total num frames: 2237620224. Throughput: 0: 10211.6. Samples: 309394458. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:33,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:34,814][626795] Updated weights for policy 0, policy_version 273152 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:36,730][626795] Updated weights for policy 0, policy_version 273162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:38,690][626795] Updated weights for policy 0, policy_version 273172 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:38,975][24592] Fps is (10 sec: 40960.5, 60 sec: 40960.2, 300 sec: 40904.5). Total num frames: 2237833216. Throughput: 0: 10255.0. Samples: 309456780. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:38,976][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:40,689][626795] Updated weights for policy 0, policy_version 273182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:42,668][626795] Updated weights for policy 0, policy_version 273192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40960.0, 300 sec: 40904.5). Total num frames: 2238038016. Throughput: 0: 10252.5. Samples: 309487068. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:43,978][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:44,678][626795] Updated weights for policy 0, policy_version 273202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:46,728][626795] Updated weights for policy 0, policy_version 273212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:48,711][626795] Updated weights for policy 0, policy_version 273222 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:48,975][24592] Fps is (10 sec: 40959.6, 60 sec: 40960.0, 300 sec: 40876.7). Total num frames: 2238242816. Throughput: 0: 10268.9. Samples: 309548490. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:48,977][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:50,641][626795] Updated weights for policy 0, policy_version 273232 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:52,616][626795] Updated weights for policy 0, policy_version 273242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:53,981][24592] Fps is (10 sec: 40936.4, 60 sec: 40956.2, 300 sec: 40875.9). Total num frames: 2238447616. Throughput: 0: 10295.1. Samples: 309611358. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:53,982][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:54,628][626795] Updated weights for policy 0, policy_version 273252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:56,615][626795] Updated weights for policy 0, policy_version 273262 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:24:58,596][626795] Updated weights for policy 0, policy_version 273272 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:58,976][24592] Fps is (10 sec: 40959.9, 60 sec: 40960.1, 300 sec: 40932.2). Total num frames: 2238652416. Throughput: 0: 10277.8. Samples: 309641226. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:24:58,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:00,706][626795] Updated weights for policy 0, policy_version 273282 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:02,660][626795] Updated weights for policy 0, policy_version 273292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:03,976][24592] Fps is (10 sec: 40980.9, 60 sec: 41096.1, 300 sec: 40959.9). Total num frames: 2238857216. Throughput: 0: 10272.2. Samples: 309702858. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:03,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:03,993][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000273299_2238865408.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:04,081][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000272098_2229026816.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:04,759][626795] Updated weights for policy 0, policy_version 273302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:06,748][626795] Updated weights for policy 0, policy_version 273312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:08,695][626795] Updated weights for policy 0, policy_version 273322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:08,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41096.6, 300 sec: 40987.8). Total num frames: 2239062016. Throughput: 0: 10239.6. Samples: 309763488. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:08,978][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:10,731][626795] Updated weights for policy 0, policy_version 273332 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:12,681][626795] Updated weights for policy 0, policy_version 273342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:13,975][24592] Fps is (10 sec: 40962.7, 60 sec: 41096.6, 300 sec: 40987.8). Total num frames: 2239266816. Throughput: 0: 10242.3. Samples: 309793782. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:13,976][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:14,693][626795] Updated weights for policy 0, policy_version 273352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:16,715][626795] Updated weights for policy 0, policy_version 273362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:18,672][626795] Updated weights for policy 0, policy_version 273372 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:18,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40960.0, 300 sec: 40960.0). Total num frames: 2239471616. Throughput: 0: 10259.1. Samples: 309856116. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:18,977][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:20,672][626795] Updated weights for policy 0, policy_version 273382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:22,604][626795] Updated weights for policy 0, policy_version 273392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:23,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40960.1, 300 sec: 40960.0). Total num frames: 2239676416. Throughput: 0: 10247.6. Samples: 309917922. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:23,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:24,640][626795] Updated weights for policy 0, policy_version 273402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:26,591][626795] Updated weights for policy 0, policy_version 273412 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:28,574][626795] Updated weights for policy 0, policy_version 273422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:28,976][24592] Fps is (10 sec: 40958.6, 60 sec: 40959.8, 300 sec: 40932.2). Total num frames: 2239881216. Throughput: 0: 10271.5. Samples: 309949290. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:28,977][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:30,681][626795] Updated weights for policy 0, policy_version 273432 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:32,566][626795] Updated weights for policy 0, policy_version 273442 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:33,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41096.4, 300 sec: 40960.0). Total num frames: 2240086016. Throughput: 0: 10261.4. Samples: 310010256. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:33,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:34,680][626795] Updated weights for policy 0, policy_version 273452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:36,625][626795] Updated weights for policy 0, policy_version 273462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:38,564][626795] Updated weights for policy 0, policy_version 273472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:38,975][24592] Fps is (10 sec: 40961.5, 60 sec: 40960.0, 300 sec: 40960.0). Total num frames: 2240290816. Throughput: 0: 10237.0. Samples: 310071966. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:38,976][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:40,706][626795] Updated weights for policy 0, policy_version 273482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:42,658][626795] Updated weights for policy 0, policy_version 273492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:43,975][24592] Fps is (10 sec: 41780.3, 60 sec: 41096.5, 300 sec: 41015.6). Total num frames: 2240503808. Throughput: 0: 10245.8. Samples: 310102284. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:43,977][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:44,493][626795] Updated weights for policy 0, policy_version 273502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:46,621][626795] Updated weights for policy 0, policy_version 273512 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:48,609][626795] Updated weights for policy 0, policy_version 273522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:48,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40960.1, 300 sec: 40987.8). Total num frames: 2240700416. Throughput: 0: 10254.4. Samples: 310164300. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:48,977][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:50,754][626795] Updated weights for policy 0, policy_version 273532 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:52,574][626795] Updated weights for policy 0, policy_version 273542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:53,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41100.5, 300 sec: 41016.0). Total num frames: 2240913408. Throughput: 0: 10269.6. Samples: 310225620. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:53,977][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:54,609][626795] Updated weights for policy 0, policy_version 273552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:56,597][626795] Updated weights for policy 0, policy_version 273562 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:25:58,531][626795] Updated weights for policy 0, policy_version 273572 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41096.6, 300 sec: 40987.8). Total num frames: 2241118208. Throughput: 0: 10277.6. Samples: 310256274. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:25:58,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:00,632][626795] Updated weights for policy 0, policy_version 273582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:02,614][626795] Updated weights for policy 0, policy_version 273592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:03,975][24592] Fps is (10 sec: 40140.6, 60 sec: 40960.4, 300 sec: 40960.2). Total num frames: 2241314816. Throughput: 0: 10257.2. Samples: 310317690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:03,976][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:04,642][626795] Updated weights for policy 0, policy_version 273602 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:06,746][626795] Updated weights for policy 0, policy_version 273612 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:08,669][626795] Updated weights for policy 0, policy_version 273622 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:08,976][24592] Fps is (10 sec: 40139.9, 60 sec: 40959.8, 300 sec: 40987.8). Total num frames: 2241519616. Throughput: 0: 10241.0. Samples: 310378770. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:08,978][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:10,635][626795] Updated weights for policy 0, policy_version 273632 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:12,679][626795] Updated weights for policy 0, policy_version 273642 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:13,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40960.0, 300 sec: 40987.8). Total num frames: 2241724416. Throughput: 0: 10229.0. Samples: 310409592. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:13,977][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:14,644][626795] Updated weights for policy 0, policy_version 273652 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:16,561][626795] Updated weights for policy 0, policy_version 273662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:18,619][626795] Updated weights for policy 0, policy_version 273672 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:18,975][24592] Fps is (10 sec: 40960.8, 60 sec: 40960.0, 300 sec: 40987.8). Total num frames: 2241929216. Throughput: 0: 10240.3. Samples: 310471068. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:18,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:20,616][626795] Updated weights for policy 0, policy_version 273682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:22,550][626795] Updated weights for policy 0, policy_version 273692 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:23,975][24592] Fps is (10 sec: 40959.6, 60 sec: 40959.9, 300 sec: 40987.8). Total num frames: 2242134016. Throughput: 0: 10248.0. Samples: 310533126. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:23,976][24592] Avg episode reward: [(0, '4.871')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:24,586][626795] Updated weights for policy 0, policy_version 273702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:26,550][626795] Updated weights for policy 0, policy_version 273712 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:28,471][626795] Updated weights for policy 0, policy_version 273722 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:28,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40960.3, 300 sec: 40987.8). Total num frames: 2242338816. Throughput: 0: 10250.3. Samples: 310563546. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:28,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:30,536][626795] Updated weights for policy 0, policy_version 273732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:32,533][626795] Updated weights for policy 0, policy_version 273742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:33,976][24592] Fps is (10 sec: 41777.0, 60 sec: 41096.3, 300 sec: 41015.5). Total num frames: 2242551808. Throughput: 0: 10253.5. Samples: 310625712. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:33,977][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:34,564][626795] Updated weights for policy 0, policy_version 273752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:36,509][626795] Updated weights for policy 0, policy_version 273762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:38,603][626795] Updated weights for policy 0, policy_version 273772 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:38,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41096.5, 300 sec: 41015.5). Total num frames: 2242756608. Throughput: 0: 10253.6. Samples: 310687032. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:38,976][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:40,501][626795] Updated weights for policy 0, policy_version 273782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:42,516][626795] Updated weights for policy 0, policy_version 273792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:43,976][24592] Fps is (10 sec: 40961.1, 60 sec: 40959.8, 300 sec: 41043.4). Total num frames: 2242961408. Throughput: 0: 10257.0. Samples: 310717842. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:43,977][24592] Avg episode reward: [(0, '4.885')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:44,502][626795] Updated weights for policy 0, policy_version 273802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:46,547][626795] Updated weights for policy 0, policy_version 273812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:48,370][626795] Updated weights for policy 0, policy_version 273822 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:48,976][24592] Fps is (10 sec: 40958.4, 60 sec: 41096.2, 300 sec: 41015.5). Total num frames: 2243166208. Throughput: 0: 10276.3. Samples: 310780128. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:48,977][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:50,353][626795] Updated weights for policy 0, policy_version 273832 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:52,268][626795] Updated weights for policy 0, policy_version 273842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:53,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41096.5, 300 sec: 41071.1). Total num frames: 2243379200. Throughput: 0: 10318.6. Samples: 310843104. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:53,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:54,366][626795] Updated weights for policy 0, policy_version 273852 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:56,320][626795] Updated weights for policy 0, policy_version 273862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:26:58,261][626795] Updated weights for policy 0, policy_version 273872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:58,975][24592] Fps is (10 sec: 41781.5, 60 sec: 41096.5, 300 sec: 41043.3). Total num frames: 2243584000. Throughput: 0: 10311.5. Samples: 310873608. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:26:58,977][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:00,192][626795] Updated weights for policy 0, policy_version 273882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:02,303][626795] Updated weights for policy 0, policy_version 273892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:03,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41369.5, 300 sec: 41071.2). Total num frames: 2243796992. Throughput: 0: 10333.0. Samples: 310936056. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:03,976][24592] Avg episode reward: [(0, '4.299')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000273901_2243796992.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:04,053][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000272698_2233942016.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:04,204][626795] Updated weights for policy 0, policy_version 273902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:06,240][626795] Updated weights for policy 0, policy_version 273912 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:08,282][626795] Updated weights for policy 0, policy_version 273922 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:08,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41233.1, 300 sec: 41043.3). Total num frames: 2243993600. Throughput: 0: 10304.9. Samples: 310996848. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:08,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:10,304][626795] Updated weights for policy 0, policy_version 273932 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:12,309][626795] Updated weights for policy 0, policy_version 273942 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:13,976][24592] Fps is (10 sec: 40140.6, 60 sec: 41232.9, 300 sec: 41043.3). Total num frames: 2244198400. Throughput: 0: 10308.8. Samples: 311027442. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:13,976][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:14,242][626795] Updated weights for policy 0, policy_version 273952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:15,081][626772] Signal inference workers to stop experience collection... (4100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:15,082][626772] Signal inference workers to resume experience collection... (4100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:15,089][626795] InferenceWorker_p0-w0: stopping experience collection (4100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:15,095][626795] InferenceWorker_p0-w0: resuming experience collection (4100 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:16,249][626795] Updated weights for policy 0, policy_version 273962 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:18,268][626795] Updated weights for policy 0, policy_version 273972 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:18,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41233.1, 300 sec: 41071.1). Total num frames: 2244403200. Throughput: 0: 10296.7. Samples: 311089056. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:18,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:20,256][626795] Updated weights for policy 0, policy_version 273982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:22,187][626795] Updated weights for policy 0, policy_version 273992 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:23,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41369.4, 300 sec: 41071.0). Total num frames: 2244616192. Throughput: 0: 10332.5. Samples: 311151996. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:23,977][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:24,127][626795] Updated weights for policy 0, policy_version 274002 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:26,078][626795] Updated weights for policy 0, policy_version 274012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:28,018][626795] Updated weights for policy 0, policy_version 274022 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:28,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41369.5, 300 sec: 41071.8). Total num frames: 2244820992. Throughput: 0: 10335.0. Samples: 311182914. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:28,977][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:30,092][626795] Updated weights for policy 0, policy_version 274032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:31,968][626795] Updated weights for policy 0, policy_version 274042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:33,896][626795] Updated weights for policy 0, policy_version 274052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:33,975][24592] Fps is (10 sec: 41780.9, 60 sec: 41370.0, 300 sec: 41098.8). Total num frames: 2245033984. Throughput: 0: 10338.6. Samples: 311245362. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:33,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:35,888][626795] Updated weights for policy 0, policy_version 274062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:37,897][626795] Updated weights for policy 0, policy_version 274072 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:38,976][24592] Fps is (10 sec: 41779.0, 60 sec: 41369.6, 300 sec: 41098.8). Total num frames: 2245238784. Throughput: 0: 10335.8. Samples: 311308218. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:38,979][24592] Avg episode reward: [(0, '4.299')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:39,882][626795] Updated weights for policy 0, policy_version 274082 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:41,882][626795] Updated weights for policy 0, policy_version 274092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:43,796][626795] Updated weights for policy 0, policy_version 274102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:43,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41369.8, 300 sec: 41099.0). Total num frames: 2245443584. Throughput: 0: 10338.8. Samples: 311338854. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:43,976][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:45,898][626795] Updated weights for policy 0, policy_version 274112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:47,803][626795] Updated weights for policy 0, policy_version 274122 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:48,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41369.9, 300 sec: 41098.8). Total num frames: 2245648384. Throughput: 0: 10312.0. Samples: 311400096. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:48,977][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:49,863][626795] Updated weights for policy 0, policy_version 274132 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:51,779][626795] Updated weights for policy 0, policy_version 274142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:53,723][626795] Updated weights for policy 0, policy_version 274152 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:53,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41369.5, 300 sec: 41126.6). Total num frames: 2245861376. Throughput: 0: 10365.2. Samples: 311463282. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:53,981][24592] Avg episode reward: [(0, '4.824')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:55,746][626795] Updated weights for policy 0, policy_version 274162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:57,732][626795] Updated weights for policy 0, policy_version 274172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:58,976][24592] Fps is (10 sec: 41776.0, 60 sec: 41369.0, 300 sec: 41126.5). Total num frames: 2246066176. Throughput: 0: 10359.6. Samples: 311493630. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:27:58,978][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:27:59,676][626795] Updated weights for policy 0, policy_version 274182 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:01,660][626795] Updated weights for policy 0, policy_version 274192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:03,543][626795] Updated weights for policy 0, policy_version 274202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:03,976][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.6, 300 sec: 41154.4). Total num frames: 2246279168. Throughput: 0: 10391.8. Samples: 311556690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:03,977][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:05,623][626795] Updated weights for policy 0, policy_version 274212 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:07,469][626795] Updated weights for policy 0, policy_version 274222 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:08,976][24592] Fps is (10 sec: 41782.0, 60 sec: 41506.1, 300 sec: 41154.3). Total num frames: 2246483968. Throughput: 0: 10392.4. Samples: 311619654. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:08,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:09,532][626795] Updated weights for policy 0, policy_version 274232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:11,585][626795] Updated weights for policy 0, policy_version 274242 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:13,578][626795] Updated weights for policy 0, policy_version 274252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:13,975][24592] Fps is (10 sec: 40141.2, 60 sec: 41369.7, 300 sec: 41126.6). Total num frames: 2246680576. Throughput: 0: 10354.8. Samples: 311648880. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:13,978][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:15,545][626795] Updated weights for policy 0, policy_version 274262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:17,482][626795] Updated weights for policy 0, policy_version 274272 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:18,975][24592] Fps is (10 sec: 40961.1, 60 sec: 41506.2, 300 sec: 41126.6). Total num frames: 2246893568. Throughput: 0: 10351.2. Samples: 311711166. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:18,976][24592] Avg episode reward: [(0, '4.900')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:19,569][626795] Updated weights for policy 0, policy_version 274282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:21,501][626795] Updated weights for policy 0, policy_version 274292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:23,484][626795] Updated weights for policy 0, policy_version 274302 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:23,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.9, 300 sec: 41154.4). Total num frames: 2247098368. Throughput: 0: 10339.8. Samples: 311773506. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:23,978][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:25,459][626795] Updated weights for policy 0, policy_version 274312 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:27,408][626795] Updated weights for policy 0, policy_version 274322 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:28,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.2, 300 sec: 41154.4). Total num frames: 2247311360. Throughput: 0: 10334.7. Samples: 311803914. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:28,977][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:29,421][626795] Updated weights for policy 0, policy_version 274332 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:31,412][626795] Updated weights for policy 0, policy_version 274342 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:33,327][626795] Updated weights for policy 0, policy_version 274352 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:33,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.6, 300 sec: 41154.4). Total num frames: 2247516160. Throughput: 0: 10358.8. Samples: 311866242. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:33,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:35,262][626795] Updated weights for policy 0, policy_version 274362 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:37,332][626795] Updated weights for policy 0, policy_version 274372 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:38,976][24592] Fps is (10 sec: 40956.4, 60 sec: 41369.1, 300 sec: 41154.3). Total num frames: 2247720960. Throughput: 0: 10325.8. Samples: 311927952. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:38,978][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:39,272][626795] Updated weights for policy 0, policy_version 274382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:41,368][626795] Updated weights for policy 0, policy_version 274392 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:43,360][626795] Updated weights for policy 0, policy_version 274402 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:43,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41369.6, 300 sec: 41154.4). Total num frames: 2247925760. Throughput: 0: 10333.1. Samples: 311958612. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:43,976][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:45,335][626795] Updated weights for policy 0, policy_version 274412 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:47,264][626795] Updated weights for policy 0, policy_version 274422 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:48,975][24592] Fps is (10 sec: 40963.3, 60 sec: 41369.7, 300 sec: 41154.4). Total num frames: 2248130560. Throughput: 0: 10299.1. Samples: 312020148. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:48,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:49,300][626795] Updated weights for policy 0, policy_version 274432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:51,175][626795] Updated weights for policy 0, policy_version 274442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:53,214][626795] Updated weights for policy 0, policy_version 274452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:53,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41233.2, 300 sec: 41154.4). Total num frames: 2248335360. Throughput: 0: 10286.6. Samples: 312082548. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:53,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:55,213][626795] Updated weights for policy 0, policy_version 274462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:57,199][626795] Updated weights for policy 0, policy_version 274472 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41370.2, 300 sec: 41209.9). Total num frames: 2248548352. Throughput: 0: 10327.6. Samples: 312113622. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:28:58,976][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:28:59,175][626795] Updated weights for policy 0, policy_version 274482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:01,176][626795] Updated weights for policy 0, policy_version 274492 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:03,048][626795] Updated weights for policy 0, policy_version 274502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:03,982][24592] Fps is (10 sec: 41753.6, 60 sec: 41228.9, 300 sec: 41209.1). Total num frames: 2248753152. Throughput: 0: 10325.7. Samples: 312175884. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:03,983][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:04,028][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000274507_2248761344.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:04,136][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000273299_2238865408.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:05,158][626795] Updated weights for policy 0, policy_version 274512 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:07,098][626795] Updated weights for policy 0, policy_version 274522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:08,877][626795] Updated weights for policy 0, policy_version 274532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:08,980][24592] Fps is (10 sec: 41762.3, 60 sec: 41367.0, 300 sec: 41237.1). Total num frames: 2248966144. Throughput: 0: 10335.9. Samples: 312238662. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:08,981][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:10,985][626795] Updated weights for policy 0, policy_version 274542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:12,901][626795] Updated weights for policy 0, policy_version 274552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:13,975][24592] Fps is (10 sec: 41804.7, 60 sec: 41506.1, 300 sec: 41209.9). Total num frames: 2249170944. Throughput: 0: 10354.4. Samples: 312269862. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:13,976][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:14,983][626795] Updated weights for policy 0, policy_version 274562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:16,916][626795] Updated weights for policy 0, policy_version 274572 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:18,936][626795] Updated weights for policy 0, policy_version 274582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:18,975][24592] Fps is (10 sec: 40976.4, 60 sec: 41369.5, 300 sec: 41209.9). Total num frames: 2249375744. Throughput: 0: 10326.2. Samples: 312330924. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:18,977][24592] Avg episode reward: [(0, '5.246')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:20,860][626795] Updated weights for policy 0, policy_version 274592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:22,973][626795] Updated weights for policy 0, policy_version 274602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:23,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2249580544. Throughput: 0: 10348.2. Samples: 312393612. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:23,978][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:24,907][626795] Updated weights for policy 0, policy_version 274612 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:26,820][626795] Updated weights for policy 0, policy_version 274622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:28,743][626795] Updated weights for policy 0, policy_version 274632 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:28,976][24592] Fps is (10 sec: 41777.6, 60 sec: 41369.3, 300 sec: 41265.4). Total num frames: 2249793536. Throughput: 0: 10355.6. Samples: 312424620. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:28,993][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:30,791][626795] Updated weights for policy 0, policy_version 274642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:32,671][626795] Updated weights for policy 0, policy_version 274652 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:33,976][24592] Fps is (10 sec: 41774.8, 60 sec: 41368.8, 300 sec: 41237.5). Total num frames: 2249998336. Throughput: 0: 10380.2. Samples: 312487266. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:33,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:34,791][626795] Updated weights for policy 0, policy_version 274662 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:36,690][626795] Updated weights for policy 0, policy_version 274672 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:38,591][626795] Updated weights for policy 0, policy_version 274682 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:38,975][24592] Fps is (10 sec: 40961.6, 60 sec: 41370.2, 300 sec: 41237.7). Total num frames: 2250203136. Throughput: 0: 10384.5. Samples: 312549852. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:38,976][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:40,651][626795] Updated weights for policy 0, policy_version 274692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:42,699][626795] Updated weights for policy 0, policy_version 274702 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:43,976][24592] Fps is (10 sec: 40964.0, 60 sec: 41369.6, 300 sec: 41237.7). Total num frames: 2250407936. Throughput: 0: 10371.0. Samples: 312580320. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:43,977][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:44,600][626795] Updated weights for policy 0, policy_version 274712 (0.0063)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:46,676][626795] Updated weights for policy 0, policy_version 274722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:48,588][626795] Updated weights for policy 0, policy_version 274732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:48,976][24592] Fps is (10 sec: 40958.2, 60 sec: 41369.3, 300 sec: 41238.4). Total num frames: 2250612736. Throughput: 0: 10353.3. Samples: 312641724. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:48,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:50,555][626795] Updated weights for policy 0, policy_version 274742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:52,599][626795] Updated weights for policy 0, policy_version 274752 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:53,976][24592] Fps is (10 sec: 40958.4, 60 sec: 41369.3, 300 sec: 41237.6). Total num frames: 2250817536. Throughput: 0: 10321.5. Samples: 312703092. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:53,977][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:54,693][626795] Updated weights for policy 0, policy_version 274762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:56,727][626795] Updated weights for policy 0, policy_version 274772 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:29:58,617][626795] Updated weights for policy 0, policy_version 274782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:58,975][24592] Fps is (10 sec: 40962.0, 60 sec: 41233.1, 300 sec: 41237.8). Total num frames: 2251022336. Throughput: 0: 10298.5. Samples: 312733296. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:29:58,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:00,538][626795] Updated weights for policy 0, policy_version 274792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:02,591][626795] Updated weights for policy 0, policy_version 274802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:03,975][24592] Fps is (10 sec: 41781.2, 60 sec: 41373.8, 300 sec: 41265.5). Total num frames: 2251235328. Throughput: 0: 10327.7. Samples: 312795672. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:03,976][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:04,539][626795] Updated weights for policy 0, policy_version 274812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:06,616][626795] Updated weights for policy 0, policy_version 274822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:08,448][626795] Updated weights for policy 0, policy_version 274832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:08,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41235.8, 300 sec: 41265.5). Total num frames: 2251440128. Throughput: 0: 10321.5. Samples: 312858078. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:08,976][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:10,497][626795] Updated weights for policy 0, policy_version 274842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:12,570][626795] Updated weights for policy 0, policy_version 274852 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:13,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41233.1, 300 sec: 41265.5). Total num frames: 2251644928. Throughput: 0: 10312.2. Samples: 312888666. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:13,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:14,466][626795] Updated weights for policy 0, policy_version 274862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:16,436][626795] Updated weights for policy 0, policy_version 274872 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:18,571][626795] Updated weights for policy 0, policy_version 274882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:18,976][24592] Fps is (10 sec: 40959.2, 60 sec: 41233.0, 300 sec: 41265.4). Total num frames: 2251849728. Throughput: 0: 10311.9. Samples: 312951294. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:18,978][24592] Avg episode reward: [(0, '4.499')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:20,626][626795] Updated weights for policy 0, policy_version 274892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:22,621][626795] Updated weights for policy 0, policy_version 274902 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:23,976][24592] Fps is (10 sec: 40957.9, 60 sec: 41232.7, 300 sec: 41265.5). Total num frames: 2252054528. Throughput: 0: 10249.4. Samples: 313011078. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:23,977][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:24,679][626795] Updated weights for policy 0, policy_version 274912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:26,598][626795] Updated weights for policy 0, policy_version 274922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:28,481][626795] Updated weights for policy 0, policy_version 274932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:28,975][24592] Fps is (10 sec: 40141.5, 60 sec: 40960.3, 300 sec: 41237.7). Total num frames: 2252251136. Throughput: 0: 10257.1. Samples: 313041888. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:28,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:30,535][626795] Updated weights for policy 0, policy_version 274942 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:32,509][626795] Updated weights for policy 0, policy_version 274952 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:33,977][24592] Fps is (10 sec: 40955.4, 60 sec: 41096.2, 300 sec: 41265.2). Total num frames: 2252464128. Throughput: 0: 10259.2. Samples: 313103400. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:33,978][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:34,588][626795] Updated weights for policy 0, policy_version 274962 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:36,527][626795] Updated weights for policy 0, policy_version 274972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:38,520][626795] Updated weights for policy 0, policy_version 274982 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:38,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41096.3, 300 sec: 41237.7). Total num frames: 2252668928. Throughput: 0: 10280.7. Samples: 313165722. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:38,976][24592] Avg episode reward: [(0, '4.854')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:40,422][626795] Updated weights for policy 0, policy_version 274992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:42,407][626795] Updated weights for policy 0, policy_version 275002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:43,981][24592] Fps is (10 sec: 40943.6, 60 sec: 41092.7, 300 sec: 41264.7). Total num frames: 2252873728. Throughput: 0: 10289.1. Samples: 313196364. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:43,982][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:44,460][626795] Updated weights for policy 0, policy_version 275012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:46,388][626795] Updated weights for policy 0, policy_version 275022 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:48,430][626795] Updated weights for policy 0, policy_version 275032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:48,976][24592] Fps is (10 sec: 41779.5, 60 sec: 41233.2, 300 sec: 41265.4). Total num frames: 2253086720. Throughput: 0: 10296.2. Samples: 313259004. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:48,976][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:50,427][626795] Updated weights for policy 0, policy_version 275042 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:52,463][626795] Updated weights for policy 0, policy_version 275052 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:53,975][24592] Fps is (10 sec: 40982.9, 60 sec: 41096.8, 300 sec: 41237.7). Total num frames: 2253283328. Throughput: 0: 10260.8. Samples: 313319814. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:53,977][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:54,499][626795] Updated weights for policy 0, policy_version 275062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:56,440][626795] Updated weights for policy 0, policy_version 275072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:30:58,337][626795] Updated weights for policy 0, policy_version 275082 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:58,975][24592] Fps is (10 sec: 40141.9, 60 sec: 41096.5, 300 sec: 41265.5). Total num frames: 2253488128. Throughput: 0: 10259.2. Samples: 313350330. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:30:58,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:00,450][626795] Updated weights for policy 0, policy_version 275092 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:02,473][626795] Updated weights for policy 0, policy_version 275102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:03,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41096.5, 300 sec: 41293.3). Total num frames: 2253701120. Throughput: 0: 10248.0. Samples: 313412454. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:03,976][24592] Avg episode reward: [(0, '4.955')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000275110_2253701120.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:04,055][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000273901_2243796992.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:04,350][626795] Updated weights for policy 0, policy_version 275112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:06,341][626795] Updated weights for policy 0, policy_version 275122 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:08,333][626795] Updated weights for policy 0, policy_version 275132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:08,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41096.6, 300 sec: 41293.2). Total num frames: 2253905920. Throughput: 0: 10307.1. Samples: 313474890. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:08,977][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:10,308][626795] Updated weights for policy 0, policy_version 275142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:12,281][626795] Updated weights for policy 0, policy_version 275152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:13,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41096.6, 300 sec: 41293.2). Total num frames: 2254110720. Throughput: 0: 10310.6. Samples: 313505862. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:13,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:14,249][626795] Updated weights for policy 0, policy_version 275162 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:16,195][626795] Updated weights for policy 0, policy_version 275172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:18,141][626795] Updated weights for policy 0, policy_version 275182 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41233.2, 300 sec: 41321.0). Total num frames: 2254323712. Throughput: 0: 10320.2. Samples: 313567794. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:18,977][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:20,138][626795] Updated weights for policy 0, policy_version 275192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:22,166][626795] Updated weights for policy 0, policy_version 275202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:23,976][24592] Fps is (10 sec: 40958.4, 60 sec: 41096.6, 300 sec: 41293.2). Total num frames: 2254520320. Throughput: 0: 10311.2. Samples: 313629726. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:23,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:24,268][626795] Updated weights for policy 0, policy_version 275212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:26,198][626795] Updated weights for policy 0, policy_version 275222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:28,127][626795] Updated weights for policy 0, policy_version 275232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:28,976][24592] Fps is (10 sec: 40958.6, 60 sec: 41369.4, 300 sec: 41293.3). Total num frames: 2254733312. Throughput: 0: 10291.9. Samples: 313659444. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:28,977][24592] Avg episode reward: [(0, '4.402')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:30,190][626795] Updated weights for policy 0, policy_version 275242 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:32,147][626795] Updated weights for policy 0, policy_version 275252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:33,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41234.2, 300 sec: 41293.3). Total num frames: 2254938112. Throughput: 0: 10284.2. Samples: 313721790. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:33,976][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:34,185][626795] Updated weights for policy 0, policy_version 275262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:36,231][626795] Updated weights for policy 0, policy_version 275272 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:38,077][626795] Updated weights for policy 0, policy_version 275282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:38,975][24592] Fps is (10 sec: 40961.5, 60 sec: 41233.3, 300 sec: 41293.3). Total num frames: 2255142912. Throughput: 0: 10320.3. Samples: 313784226. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:38,977][24592] Avg episode reward: [(0, '4.404')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:40,019][626795] Updated weights for policy 0, policy_version 275292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:42,049][626795] Updated weights for policy 0, policy_version 275302 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:43,954][626795] Updated weights for policy 0, policy_version 275312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:43,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41373.5, 300 sec: 41321.1). Total num frames: 2255355904. Throughput: 0: 10319.1. Samples: 313814688. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:43,976][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:45,869][626795] Updated weights for policy 0, policy_version 275322 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:47,951][626795] Updated weights for policy 0, policy_version 275332 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:48,976][24592] Fps is (10 sec: 41777.3, 60 sec: 41232.9, 300 sec: 41293.2). Total num frames: 2255560704. Throughput: 0: 10348.6. Samples: 313878144. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:48,977][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:49,874][626795] Updated weights for policy 0, policy_version 275342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:51,812][626795] Updated weights for policy 0, policy_version 275352 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:53,732][626795] Updated weights for policy 0, policy_version 275362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:53,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41506.2, 300 sec: 41321.0). Total num frames: 2255773696. Throughput: 0: 10356.5. Samples: 313940934. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:53,980][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:55,725][626795] Updated weights for policy 0, policy_version 275372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:57,857][626795] Updated weights for policy 0, policy_version 275382 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:58,976][24592] Fps is (10 sec: 40961.0, 60 sec: 41369.4, 300 sec: 41265.5). Total num frames: 2255970304. Throughput: 0: 10329.2. Samples: 313970676. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:31:58,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:31:59,826][626795] Updated weights for policy 0, policy_version 275392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:01,814][626795] Updated weights for policy 0, policy_version 275402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:03,885][626795] Updated weights for policy 0, policy_version 275412 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:03,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2256183296. Throughput: 0: 10319.5. Samples: 314032170. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:03,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:05,875][626795] Updated weights for policy 0, policy_version 275422 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:07,685][626795] Updated weights for policy 0, policy_version 275432 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:08,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2256388096. Throughput: 0: 10336.3. Samples: 314094858. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:08,976][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:09,692][626795] Updated weights for policy 0, policy_version 275442 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:11,648][626795] Updated weights for policy 0, policy_version 275452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:13,620][626795] Updated weights for policy 0, policy_version 275462 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:13,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2256592896. Throughput: 0: 10369.2. Samples: 314126052. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:13,976][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:15,692][626795] Updated weights for policy 0, policy_version 275472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:17,596][626795] Updated weights for policy 0, policy_version 275482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:18,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41369.4, 300 sec: 41321.0). Total num frames: 2256805888. Throughput: 0: 10373.7. Samples: 314188608. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:18,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:19,592][626795] Updated weights for policy 0, policy_version 275492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:21,529][626795] Updated weights for policy 0, policy_version 275502 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:23,450][626795] Updated weights for policy 0, policy_version 275512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:23,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.4, 300 sec: 41321.0). Total num frames: 2257010688. Throughput: 0: 10376.0. Samples: 314251146. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:23,977][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:25,527][626795] Updated weights for policy 0, policy_version 275522 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:27,557][626795] Updated weights for policy 0, policy_version 275532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:28,976][24592] Fps is (10 sec: 40957.4, 60 sec: 41369.2, 300 sec: 41293.1). Total num frames: 2257215488. Throughput: 0: 10379.5. Samples: 314281776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:28,978][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:29,520][626795] Updated weights for policy 0, policy_version 275542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:31,476][626795] Updated weights for policy 0, policy_version 275552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:33,527][626795] Updated weights for policy 0, policy_version 275562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:33,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41369.6, 300 sec: 41293.3). Total num frames: 2257420288. Throughput: 0: 10334.9. Samples: 314343210. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:33,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:35,389][626795] Updated weights for policy 0, policy_version 275572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:37,428][626795] Updated weights for policy 0, policy_version 275582 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:38,976][24592] Fps is (10 sec: 40962.7, 60 sec: 41369.4, 300 sec: 41293.2). Total num frames: 2257625088. Throughput: 0: 10311.9. Samples: 314404974. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:38,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:39,396][626795] Updated weights for policy 0, policy_version 275592 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:41,362][626795] Updated weights for policy 0, policy_version 275602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:43,395][626795] Updated weights for policy 0, policy_version 275612 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:43,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41233.0, 300 sec: 41293.2). Total num frames: 2257829888. Throughput: 0: 10346.1. Samples: 314436252. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:43,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:45,378][626795] Updated weights for policy 0, policy_version 275622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:47,423][626795] Updated weights for policy 0, policy_version 275632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:48,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41369.9, 300 sec: 41293.3). Total num frames: 2258042880. Throughput: 0: 10346.5. Samples: 314497764. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:48,977][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:49,384][626795] Updated weights for policy 0, policy_version 275642 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:51,306][626795] Updated weights for policy 0, policy_version 275652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:53,337][626795] Updated weights for policy 0, policy_version 275662 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:53,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41233.0, 300 sec: 41293.4). Total num frames: 2258247680. Throughput: 0: 10343.3. Samples: 314560308. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:53,976][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:55,349][626795] Updated weights for policy 0, policy_version 275672 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:57,250][626795] Updated weights for policy 0, policy_version 275682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:58,976][24592] Fps is (10 sec: 40959.4, 60 sec: 41369.6, 300 sec: 41265.5). Total num frames: 2258452480. Throughput: 0: 10337.3. Samples: 314591232. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:32:58,976][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:32:59,221][626795] Updated weights for policy 0, policy_version 275692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:01,343][626795] Updated weights for policy 0, policy_version 275702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:03,348][626795] Updated weights for policy 0, policy_version 275712 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:03,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41233.0, 300 sec: 41265.5). Total num frames: 2258657280. Throughput: 0: 10292.8. Samples: 314651784. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:03,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:03,978][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000275715_2258657280.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:04,085][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000274507_2248761344.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:05,469][626795] Updated weights for policy 0, policy_version 275722 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:07,420][626795] Updated weights for policy 0, policy_version 275732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:08,976][24592] Fps is (10 sec: 40139.8, 60 sec: 41096.3, 300 sec: 41265.4). Total num frames: 2258853888. Throughput: 0: 10253.5. Samples: 314712558. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:08,977][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:09,465][626795] Updated weights for policy 0, policy_version 275742 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:11,428][626795] Updated weights for policy 0, policy_version 275752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:13,367][626795] Updated weights for policy 0, policy_version 275762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:13,975][24592] Fps is (10 sec: 40141.0, 60 sec: 41096.5, 300 sec: 41237.7). Total num frames: 2259058688. Throughput: 0: 10258.6. Samples: 314743404. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:13,978][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:15,322][626795] Updated weights for policy 0, policy_version 275772 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:17,307][626795] Updated weights for policy 0, policy_version 275782 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:18,975][24592] Fps is (10 sec: 41780.5, 60 sec: 41096.6, 300 sec: 41265.5). Total num frames: 2259271680. Throughput: 0: 10275.8. Samples: 314805624. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:18,977][24592] Avg episode reward: [(0, '4.373')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:19,335][626795] Updated weights for policy 0, policy_version 275792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:21,318][626795] Updated weights for policy 0, policy_version 275802 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:23,190][626795] Updated weights for policy 0, policy_version 275812 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:23,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41096.5, 300 sec: 41237.7). Total num frames: 2259476480. Throughput: 0: 10289.8. Samples: 314868012. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:23,976][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:25,428][626795] Updated weights for policy 0, policy_version 275822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:27,279][626795] Updated weights for policy 0, policy_version 275832 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:28,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41097.1, 300 sec: 41237.7). Total num frames: 2259681280. Throughput: 0: 10254.3. Samples: 314897694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:28,977][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:29,310][626795] Updated weights for policy 0, policy_version 275842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:31,225][626795] Updated weights for policy 0, policy_version 275852 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:33,200][626795] Updated weights for policy 0, policy_version 275862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:33,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41096.5, 300 sec: 41237.8). Total num frames: 2259886080. Throughput: 0: 10273.3. Samples: 314960064. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:33,976][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:35,290][626795] Updated weights for policy 0, policy_version 275872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:37,209][626795] Updated weights for policy 0, policy_version 275882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:38,976][24592] Fps is (10 sec: 40958.0, 60 sec: 41096.3, 300 sec: 41237.6). Total num frames: 2260090880. Throughput: 0: 10247.5. Samples: 315021450. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:38,977][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:39,179][626795] Updated weights for policy 0, policy_version 275892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:41,307][626795] Updated weights for policy 0, policy_version 275902 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:42,289][626772] Signal inference workers to stop experience collection... (4150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:42,290][626772] Signal inference workers to resume experience collection... (4150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:42,298][626795] InferenceWorker_p0-w0: stopping experience collection (4150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:42,300][626795] InferenceWorker_p0-w0: resuming experience collection (4150 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:43,151][626795] Updated weights for policy 0, policy_version 275912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:43,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41233.2, 300 sec: 41265.5). Total num frames: 2260303872. Throughput: 0: 10246.0. Samples: 315052302. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:43,977][24592] Avg episode reward: [(0, '4.301')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:45,157][626795] Updated weights for policy 0, policy_version 275922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:47,173][626795] Updated weights for policy 0, policy_version 275932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:48,975][24592] Fps is (10 sec: 41781.1, 60 sec: 41096.5, 300 sec: 41265.5). Total num frames: 2260508672. Throughput: 0: 10278.5. Samples: 315114318. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:48,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:49,182][626795] Updated weights for policy 0, policy_version 275942 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:51,222][626795] Updated weights for policy 0, policy_version 275952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:53,083][626795] Updated weights for policy 0, policy_version 275962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:53,976][24592] Fps is (10 sec: 40957.7, 60 sec: 41096.2, 300 sec: 41237.6). Total num frames: 2260713472. Throughput: 0: 10301.7. Samples: 315176136. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:53,978][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:55,155][626795] Updated weights for policy 0, policy_version 275972 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:57,100][626795] Updated weights for policy 0, policy_version 275982 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:58,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41096.5, 300 sec: 41238.5). Total num frames: 2260918272. Throughput: 0: 10303.1. Samples: 315207048. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:33:58,978][24592] Avg episode reward: [(0, '4.291')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:33:59,073][626795] Updated weights for policy 0, policy_version 275992 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:01,020][626795] Updated weights for policy 0, policy_version 276002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:03,152][626795] Updated weights for policy 0, policy_version 276012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:03,975][24592] Fps is (10 sec: 40962.1, 60 sec: 41096.5, 300 sec: 41210.5). Total num frames: 2261123072. Throughput: 0: 10295.5. Samples: 315268920. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:03,977][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:04,969][626795] Updated weights for policy 0, policy_version 276022 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:07,073][626795] Updated weights for policy 0, policy_version 276032 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:08,977][24592] Fps is (10 sec: 40956.6, 60 sec: 41232.6, 300 sec: 41209.8). Total num frames: 2261327872. Throughput: 0: 10269.2. Samples: 315330138. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:08,978][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:09,084][626795] Updated weights for policy 0, policy_version 276042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:11,091][626795] Updated weights for policy 0, policy_version 276052 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:13,081][626795] Updated weights for policy 0, policy_version 276062 (0.0033)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:13,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41233.0, 300 sec: 41209.9). Total num frames: 2261532672. Throughput: 0: 10286.5. Samples: 315360588. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:13,977][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:15,185][626795] Updated weights for policy 0, policy_version 276072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:17,088][626795] Updated weights for policy 0, policy_version 276082 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:18,977][24592] Fps is (10 sec: 40960.5, 60 sec: 41095.9, 300 sec: 41209.8). Total num frames: 2261737472. Throughput: 0: 10281.9. Samples: 315422760. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:18,978][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:19,036][626795] Updated weights for policy 0, policy_version 276092 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:21,077][626795] Updated weights for policy 0, policy_version 276102 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:22,976][626795] Updated weights for policy 0, policy_version 276112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:23,976][24592] Fps is (10 sec: 40957.8, 60 sec: 41096.2, 300 sec: 41182.1). Total num frames: 2261942272. Throughput: 0: 10285.6. Samples: 315484302. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:23,977][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:25,014][626795] Updated weights for policy 0, policy_version 276122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:26,953][626795] Updated weights for policy 0, policy_version 276132 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:28,976][24592] Fps is (10 sec: 40963.1, 60 sec: 41096.4, 300 sec: 41182.3). Total num frames: 2262147072. Throughput: 0: 10287.8. Samples: 315515256. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:28,976][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:28,979][626795] Updated weights for policy 0, policy_version 276142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:30,946][626795] Updated weights for policy 0, policy_version 276152 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:33,010][626795] Updated weights for policy 0, policy_version 276162 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:33,975][24592] Fps is (10 sec: 41781.7, 60 sec: 41233.1, 300 sec: 41209.9). Total num frames: 2262360064. Throughput: 0: 10282.0. Samples: 315577008. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:33,976][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:34,948][626795] Updated weights for policy 0, policy_version 276172 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:37,021][626795] Updated weights for policy 0, policy_version 276182 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:38,933][626795] Updated weights for policy 0, policy_version 276192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:38,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41233.4, 300 sec: 41209.9). Total num frames: 2262564864. Throughput: 0: 10278.1. Samples: 315638646. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:38,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:40,943][626795] Updated weights for policy 0, policy_version 276202 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:42,994][626795] Updated weights for policy 0, policy_version 276212 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:43,976][24592] Fps is (10 sec: 40138.2, 60 sec: 40959.6, 300 sec: 41182.1). Total num frames: 2262761472. Throughput: 0: 10258.3. Samples: 315668676. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:43,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:44,998][626795] Updated weights for policy 0, policy_version 276222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:47,008][626795] Updated weights for policy 0, policy_version 276232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:48,978][24592] Fps is (10 sec: 40129.7, 60 sec: 40958.1, 300 sec: 41181.8). Total num frames: 2262966272. Throughput: 0: 10255.8. Samples: 315730458. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:48,979][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:49,053][626795] Updated weights for policy 0, policy_version 276242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:51,048][626795] Updated weights for policy 0, policy_version 276252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:52,984][626795] Updated weights for policy 0, policy_version 276262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:53,975][24592] Fps is (10 sec: 41781.7, 60 sec: 41096.9, 300 sec: 41209.9). Total num frames: 2263179264. Throughput: 0: 10266.9. Samples: 315792138. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:53,977][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:54,866][626795] Updated weights for policy 0, policy_version 276272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:56,957][626795] Updated weights for policy 0, policy_version 276282 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:58,976][24592] Fps is (10 sec: 40966.9, 60 sec: 40959.4, 300 sec: 41154.2). Total num frames: 2263375872. Throughput: 0: 10285.8. Samples: 315823458. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:34:58,978][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:34:58,986][626795] Updated weights for policy 0, policy_version 276292 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:01,002][626795] Updated weights for policy 0, policy_version 276302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:03,014][626795] Updated weights for policy 0, policy_version 276312 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:03,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41096.6, 300 sec: 41182.2). Total num frames: 2263588864. Throughput: 0: 10244.5. Samples: 315883752. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:03,977][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000276317_2263588864.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:04,060][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000275110_2253701120.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:05,193][626795] Updated weights for policy 0, policy_version 276322 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:07,055][626795] Updated weights for policy 0, policy_version 276332 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:08,976][24592] Fps is (10 sec: 40963.6, 60 sec: 40960.6, 300 sec: 41154.4). Total num frames: 2263785472. Throughput: 0: 10236.9. Samples: 315944958. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:08,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:09,127][626795] Updated weights for policy 0, policy_version 276342 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:11,069][626795] Updated weights for policy 0, policy_version 276352 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:13,092][626795] Updated weights for policy 0, policy_version 276362 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:13,976][24592] Fps is (10 sec: 39320.5, 60 sec: 40823.3, 300 sec: 41126.6). Total num frames: 2263982080. Throughput: 0: 10216.0. Samples: 315974976. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:13,977][24592] Avg episode reward: [(0, '4.944')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:15,068][626795] Updated weights for policy 0, policy_version 276372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:17,082][626795] Updated weights for policy 0, policy_version 276382 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:18,976][24592] Fps is (10 sec: 40960.1, 60 sec: 40960.5, 300 sec: 41154.4). Total num frames: 2264195072. Throughput: 0: 10212.9. Samples: 316036590. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:18,976][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:19,156][626795] Updated weights for policy 0, policy_version 276392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:21,196][626795] Updated weights for policy 0, policy_version 276402 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:23,165][626795] Updated weights for policy 0, policy_version 276412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:23,975][24592] Fps is (10 sec: 41780.2, 60 sec: 40960.4, 300 sec: 41182.2). Total num frames: 2264399872. Throughput: 0: 10207.2. Samples: 316097970. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:23,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:25,080][626795] Updated weights for policy 0, policy_version 276422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:27,019][626795] Updated weights for policy 0, policy_version 276432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:28,975][24592] Fps is (10 sec: 40960.8, 60 sec: 40960.1, 300 sec: 41154.6). Total num frames: 2264604672. Throughput: 0: 10232.2. Samples: 316129116. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:28,976][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:29,017][626795] Updated weights for policy 0, policy_version 276442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:31,064][626795] Updated weights for policy 0, policy_version 276452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:32,903][626795] Updated weights for policy 0, policy_version 276462 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:33,975][24592] Fps is (10 sec: 41779.5, 60 sec: 40960.0, 300 sec: 41182.2). Total num frames: 2264817664. Throughput: 0: 10247.3. Samples: 316191558. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:33,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:34,963][626795] Updated weights for policy 0, policy_version 276472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:36,918][626795] Updated weights for policy 0, policy_version 276482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:38,972][626795] Updated weights for policy 0, policy_version 276492 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:38,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40960.0, 300 sec: 41182.9). Total num frames: 2265022464. Throughput: 0: 10261.1. Samples: 316253886. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:38,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:40,958][626795] Updated weights for policy 0, policy_version 276502 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:42,954][626795] Updated weights for policy 0, policy_version 276512 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:43,976][24592] Fps is (10 sec: 40958.9, 60 sec: 41096.8, 300 sec: 41154.4). Total num frames: 2265227264. Throughput: 0: 10234.1. Samples: 316283982. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:43,976][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:44,921][626795] Updated weights for policy 0, policy_version 276522 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:46,905][626795] Updated weights for policy 0, policy_version 276532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:48,917][626795] Updated weights for policy 0, policy_version 276542 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:48,978][24592] Fps is (10 sec: 40950.6, 60 sec: 41096.9, 300 sec: 41181.9). Total num frames: 2265432064. Throughput: 0: 10250.8. Samples: 316345062. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:48,978][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:51,014][626795] Updated weights for policy 0, policy_version 276552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:53,006][626795] Updated weights for policy 0, policy_version 276562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:53,975][24592] Fps is (10 sec: 40960.9, 60 sec: 40960.0, 300 sec: 41182.2). Total num frames: 2265636864. Throughput: 0: 10256.8. Samples: 316406514. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:53,976][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:54,984][626795] Updated weights for policy 0, policy_version 276572 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:56,901][626795] Updated weights for policy 0, policy_version 276582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:35:58,922][626795] Updated weights for policy 0, policy_version 276592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:58,975][24592] Fps is (10 sec: 40969.5, 60 sec: 41097.3, 300 sec: 41154.4). Total num frames: 2265841664. Throughput: 0: 10278.9. Samples: 316437522. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:35:58,977][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:00,875][626795] Updated weights for policy 0, policy_version 276602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:02,894][626795] Updated weights for policy 0, policy_version 276612 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:03,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40960.0, 300 sec: 41154.4). Total num frames: 2266046464. Throughput: 0: 10291.1. Samples: 316499688. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:03,977][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:04,869][626795] Updated weights for policy 0, policy_version 276622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:06,821][626795] Updated weights for policy 0, policy_version 276632 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:08,815][626795] Updated weights for policy 0, policy_version 276642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:08,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.2, 300 sec: 41182.2). Total num frames: 2266259456. Throughput: 0: 10322.9. Samples: 316562502. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:08,977][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:10,676][626795] Updated weights for policy 0, policy_version 276652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:12,756][626795] Updated weights for policy 0, policy_version 276662 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:13,975][24592] Fps is (10 sec: 41778.6, 60 sec: 41369.7, 300 sec: 41154.4). Total num frames: 2266464256. Throughput: 0: 10322.6. Samples: 316593636. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:13,979][24592] Avg episode reward: [(0, '4.986')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:14,857][626795] Updated weights for policy 0, policy_version 276672 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:16,717][626795] Updated weights for policy 0, policy_version 276682 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:18,810][626795] Updated weights for policy 0, policy_version 276692 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:18,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41233.2, 300 sec: 41182.2). Total num frames: 2266669056. Throughput: 0: 10287.1. Samples: 316654476. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:18,976][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:20,682][626795] Updated weights for policy 0, policy_version 276702 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:22,703][626795] Updated weights for policy 0, policy_version 276712 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:23,976][24592] Fps is (10 sec: 40959.9, 60 sec: 41233.0, 300 sec: 41154.4). Total num frames: 2266873856. Throughput: 0: 10289.3. Samples: 316716906. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:23,978][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:24,705][626795] Updated weights for policy 0, policy_version 276722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:26,712][626795] Updated weights for policy 0, policy_version 276732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:28,582][626795] Updated weights for policy 0, policy_version 276742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:28,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41233.0, 300 sec: 41154.4). Total num frames: 2267078656. Throughput: 0: 10310.0. Samples: 316747932. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:28,976][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:30,561][626795] Updated weights for policy 0, policy_version 276752 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:32,554][626795] Updated weights for policy 0, policy_version 276762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:33,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41233.1, 300 sec: 41182.2). Total num frames: 2267291648. Throughput: 0: 10336.3. Samples: 316810170. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:33,977][24592] Avg episode reward: [(0, '4.854')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:34,618][626795] Updated weights for policy 0, policy_version 276772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:36,490][626795] Updated weights for policy 0, policy_version 276782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:38,441][626795] Updated weights for policy 0, policy_version 276792 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:38,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41232.8, 300 sec: 41154.3). Total num frames: 2267496448. Throughput: 0: 10360.1. Samples: 316872720. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:38,976][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:40,481][626795] Updated weights for policy 0, policy_version 276802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:42,404][626795] Updated weights for policy 0, policy_version 276812 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:43,976][24592] Fps is (10 sec: 40956.0, 60 sec: 41232.6, 300 sec: 41154.3). Total num frames: 2267701248. Throughput: 0: 10355.9. Samples: 316903548. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:43,979][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:44,420][626795] Updated weights for policy 0, policy_version 276822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:46,449][626795] Updated weights for policy 0, policy_version 276832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:48,488][626795] Updated weights for policy 0, policy_version 276842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:48,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41234.2, 300 sec: 41126.5). Total num frames: 2267906048. Throughput: 0: 10337.1. Samples: 316964862. Policy #0 lag: (min: 0.0, avg: 2.3, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:48,978][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:50,513][626795] Updated weights for policy 0, policy_version 276852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:52,486][626795] Updated weights for policy 0, policy_version 276862 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:53,975][24592] Fps is (10 sec: 40963.9, 60 sec: 41233.1, 300 sec: 41154.4). Total num frames: 2268110848. Throughput: 0: 10302.0. Samples: 317026092. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:53,977][24592] Avg episode reward: [(0, '4.444')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:54,470][626795] Updated weights for policy 0, policy_version 276872 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:56,445][626795] Updated weights for policy 0, policy_version 276882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:36:58,485][626795] Updated weights for policy 0, policy_version 276892 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:58,976][24592] Fps is (10 sec: 40962.0, 60 sec: 41232.9, 300 sec: 41126.6). Total num frames: 2268315648. Throughput: 0: 10290.9. Samples: 317056728. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:36:58,978][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:00,581][626795] Updated weights for policy 0, policy_version 276902 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:02,403][626795] Updated weights for policy 0, policy_version 276912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:03,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.6, 300 sec: 41154.4). Total num frames: 2268528640. Throughput: 0: 10326.8. Samples: 317119182. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:03,977][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000276920_2268528640.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:04,057][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000275715_2258657280.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:04,510][626795] Updated weights for policy 0, policy_version 276922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:06,361][626795] Updated weights for policy 0, policy_version 276932 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:08,343][626795] Updated weights for policy 0, policy_version 276942 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:08,976][24592] Fps is (10 sec: 41779.0, 60 sec: 41232.9, 300 sec: 41154.4). Total num frames: 2268733440. Throughput: 0: 10315.9. Samples: 317181120. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:08,976][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:10,378][626795] Updated weights for policy 0, policy_version 276952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:12,301][626795] Updated weights for policy 0, policy_version 276962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:13,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41233.2, 300 sec: 41126.7). Total num frames: 2268938240. Throughput: 0: 10318.0. Samples: 317212242. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:13,976][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:14,240][626795] Updated weights for policy 0, policy_version 276972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:16,247][626795] Updated weights for policy 0, policy_version 276982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:18,302][626795] Updated weights for policy 0, policy_version 276992 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:18,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41233.0, 300 sec: 41126.6). Total num frames: 2269143040. Throughput: 0: 10307.6. Samples: 317274012. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:18,977][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:20,377][626795] Updated weights for policy 0, policy_version 277002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:22,251][626795] Updated weights for policy 0, policy_version 277012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:23,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41233.1, 300 sec: 41126.7). Total num frames: 2269347840. Throughput: 0: 10286.2. Samples: 317335596. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:23,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:24,288][626795] Updated weights for policy 0, policy_version 277022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:26,302][626795] Updated weights for policy 0, policy_version 277032 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:28,280][626795] Updated weights for policy 0, policy_version 277042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:28,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41233.1, 300 sec: 41126.6). Total num frames: 2269552640. Throughput: 0: 10272.6. Samples: 317365806. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:28,977][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:30,253][626795] Updated weights for policy 0, policy_version 277052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:32,289][626795] Updated weights for policy 0, policy_version 277062 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:33,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41096.5, 300 sec: 41126.6). Total num frames: 2269757440. Throughput: 0: 10278.8. Samples: 317427402. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:33,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:34,239][626795] Updated weights for policy 0, policy_version 277072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:36,217][626795] Updated weights for policy 0, policy_version 277082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:38,142][626795] Updated weights for policy 0, policy_version 277092 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:38,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.3, 300 sec: 41154.4). Total num frames: 2269970432. Throughput: 0: 10315.5. Samples: 317490288. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:38,977][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:40,049][626795] Updated weights for policy 0, policy_version 277102 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:42,136][626795] Updated weights for policy 0, policy_version 277112 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:43,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41233.7, 300 sec: 41126.6). Total num frames: 2270175232. Throughput: 0: 10325.1. Samples: 317521356. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:43,978][24592] Avg episode reward: [(0, '4.915')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:44,013][626795] Updated weights for policy 0, policy_version 277122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:46,051][626795] Updated weights for policy 0, policy_version 277132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:47,945][626795] Updated weights for policy 0, policy_version 277142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:48,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41369.9, 300 sec: 41154.4). Total num frames: 2270388224. Throughput: 0: 10336.9. Samples: 317584344. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:48,977][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:50,023][626795] Updated weights for policy 0, policy_version 277152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:52,049][626795] Updated weights for policy 0, policy_version 277162 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:53,933][626795] Updated weights for policy 0, policy_version 277172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:53,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41369.5, 300 sec: 41154.4). Total num frames: 2270593024. Throughput: 0: 10330.0. Samples: 317645970. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:53,978][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:55,961][626795] Updated weights for policy 0, policy_version 277182 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:57,971][626795] Updated weights for policy 0, policy_version 277192 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:58,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41369.7, 300 sec: 41154.4). Total num frames: 2270797824. Throughput: 0: 10309.9. Samples: 317676186. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:37:58,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:37:59,886][626795] Updated weights for policy 0, policy_version 277202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:01,865][626795] Updated weights for policy 0, policy_version 277212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:03,828][626795] Updated weights for policy 0, policy_version 277222 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:03,977][24592] Fps is (10 sec: 41773.9, 60 sec: 41368.7, 300 sec: 41209.8). Total num frames: 2271010816. Throughput: 0: 10331.0. Samples: 317738922. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:03,978][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:05,748][626795] Updated weights for policy 0, policy_version 277232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:07,751][626795] Updated weights for policy 0, policy_version 277242 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:08,976][24592] Fps is (10 sec: 40956.5, 60 sec: 41232.6, 300 sec: 41182.0). Total num frames: 2271207424. Throughput: 0: 10357.0. Samples: 317801670. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:08,977][24592] Avg episode reward: [(0, '4.816')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:09,738][626795] Updated weights for policy 0, policy_version 277252 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:11,699][626795] Updated weights for policy 0, policy_version 277262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:13,550][626795] Updated weights for policy 0, policy_version 277272 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:13,975][24592] Fps is (10 sec: 40965.8, 60 sec: 41369.6, 300 sec: 41182.2). Total num frames: 2271420416. Throughput: 0: 10378.3. Samples: 317832828. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:13,976][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:15,619][626795] Updated weights for policy 0, policy_version 277282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:17,582][626795] Updated weights for policy 0, policy_version 277292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:18,976][24592] Fps is (10 sec: 42600.6, 60 sec: 41506.0, 300 sec: 41209.9). Total num frames: 2271633408. Throughput: 0: 10403.7. Samples: 317895570. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:18,976][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:19,636][626795] Updated weights for policy 0, policy_version 277302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:21,617][626795] Updated weights for policy 0, policy_version 277312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:23,437][626795] Updated weights for policy 0, policy_version 277322 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:23,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41506.0, 300 sec: 41209.9). Total num frames: 2271838208. Throughput: 0: 10378.2. Samples: 317957310. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:23,976][24592] Avg episode reward: [(0, '4.905')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:25,655][626795] Updated weights for policy 0, policy_version 277332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:27,649][626795] Updated weights for policy 0, policy_version 277342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:28,975][24592] Fps is (10 sec: 40142.0, 60 sec: 41369.6, 300 sec: 41182.2). Total num frames: 2272034816. Throughput: 0: 10349.5. Samples: 317987082. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:28,977][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:29,614][626795] Updated weights for policy 0, policy_version 277352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:31,618][626795] Updated weights for policy 0, policy_version 277362 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:33,579][626795] Updated weights for policy 0, policy_version 277372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:33,975][24592] Fps is (10 sec: 40960.8, 60 sec: 41506.2, 300 sec: 41210.0). Total num frames: 2272247808. Throughput: 0: 10329.1. Samples: 318049152. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:33,976][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:35,574][626795] Updated weights for policy 0, policy_version 277382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:37,542][626795] Updated weights for policy 0, policy_version 277392 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:38,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.6, 300 sec: 41182.1). Total num frames: 2272452608. Throughput: 0: 10340.1. Samples: 318111276. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:38,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:39,555][626795] Updated weights for policy 0, policy_version 277402 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:41,533][626795] Updated weights for policy 0, policy_version 277412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:43,422][626795] Updated weights for policy 0, policy_version 277422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:43,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41369.6, 300 sec: 41182.2). Total num frames: 2272657408. Throughput: 0: 10362.9. Samples: 318142518. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:43,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:45,435][626795] Updated weights for policy 0, policy_version 277432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:47,368][626795] Updated weights for policy 0, policy_version 277442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:48,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41369.7, 300 sec: 41210.0). Total num frames: 2272870400. Throughput: 0: 10357.9. Samples: 318205014. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:48,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:49,388][626795] Updated weights for policy 0, policy_version 277452 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:51,369][626795] Updated weights for policy 0, policy_version 277462 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:53,328][626795] Updated weights for policy 0, policy_version 277472 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:53,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41369.5, 300 sec: 41209.9). Total num frames: 2273075200. Throughput: 0: 10345.3. Samples: 318267204. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:53,977][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:55,275][626795] Updated weights for policy 0, policy_version 277482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:57,326][626795] Updated weights for policy 0, policy_version 277492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:58,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2273280000. Throughput: 0: 10320.7. Samples: 318297258. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:38:58,977][24592] Avg episode reward: [(0, '4.380')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:38:59,363][626795] Updated weights for policy 0, policy_version 277502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:01,318][626795] Updated weights for policy 0, policy_version 277512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:03,254][626795] Updated weights for policy 0, policy_version 277522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:03,975][24592] Fps is (10 sec: 40960.7, 60 sec: 41234.0, 300 sec: 41210.1). Total num frames: 2273484800. Throughput: 0: 10313.7. Samples: 318359682. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:03,978][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:03,993][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000277526_2273492992.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:04,119][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000276317_2263588864.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:05,275][626795] Updated weights for policy 0, policy_version 277532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:07,294][626795] Updated weights for policy 0, policy_version 277542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:08,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.7, 300 sec: 41237.7). Total num frames: 2273697792. Throughput: 0: 10322.7. Samples: 318421830. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:08,977][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:09,177][626795] Updated weights for policy 0, policy_version 277552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:11,175][626795] Updated weights for policy 0, policy_version 277562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:13,114][626795] Updated weights for policy 0, policy_version 277572 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:13,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.6, 300 sec: 41237.8). Total num frames: 2273902592. Throughput: 0: 10357.6. Samples: 318453174. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:13,977][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:15,052][626795] Updated weights for policy 0, policy_version 277582 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:17,002][626795] Updated weights for policy 0, policy_version 277592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:18,976][24592] Fps is (10 sec: 40959.9, 60 sec: 41233.3, 300 sec: 41237.8). Total num frames: 2274107392. Throughput: 0: 10361.5. Samples: 318515418. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:18,979][24592] Avg episode reward: [(0, '4.332')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:19,017][626795] Updated weights for policy 0, policy_version 277602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:20,905][626795] Updated weights for policy 0, policy_version 277612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:22,931][626795] Updated weights for policy 0, policy_version 277622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:23,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41506.3, 300 sec: 41293.3). Total num frames: 2274328576. Throughput: 0: 10391.5. Samples: 318578892. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:23,977][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:24,966][626795] Updated weights for policy 0, policy_version 277632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:26,869][626795] Updated weights for policy 0, policy_version 277642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:28,846][626795] Updated weights for policy 0, policy_version 277652 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:28,976][24592] Fps is (10 sec: 42595.9, 60 sec: 41642.3, 300 sec: 41265.4). Total num frames: 2274533376. Throughput: 0: 10380.3. Samples: 318609636. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:28,979][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:30,910][626795] Updated weights for policy 0, policy_version 277662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:32,793][626795] Updated weights for policy 0, policy_version 277672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:33,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41506.1, 300 sec: 41265.5). Total num frames: 2274738176. Throughput: 0: 10363.9. Samples: 318671388. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:33,976][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:34,741][626795] Updated weights for policy 0, policy_version 277682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:36,777][626795] Updated weights for policy 0, policy_version 277692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:38,781][626795] Updated weights for policy 0, policy_version 277702 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:38,975][24592] Fps is (10 sec: 40962.2, 60 sec: 41506.1, 300 sec: 41293.3). Total num frames: 2274942976. Throughput: 0: 10371.5. Samples: 318733920. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:38,977][24592] Avg episode reward: [(0, '4.462')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:40,656][626795] Updated weights for policy 0, policy_version 277712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:42,669][626795] Updated weights for policy 0, policy_version 277722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:43,976][24592] Fps is (10 sec: 40958.3, 60 sec: 41505.8, 300 sec: 41293.6). Total num frames: 2275147776. Throughput: 0: 10397.5. Samples: 318765150. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:43,977][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:44,630][626795] Updated weights for policy 0, policy_version 277732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:46,593][626795] Updated weights for policy 0, policy_version 277742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:48,652][626795] Updated weights for policy 0, policy_version 277752 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:48,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41506.1, 300 sec: 41293.2). Total num frames: 2275360768. Throughput: 0: 10403.6. Samples: 318827844. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:48,977][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:50,484][626795] Updated weights for policy 0, policy_version 277762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:52,411][626795] Updated weights for policy 0, policy_version 277772 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:53,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41506.2, 300 sec: 41321.1). Total num frames: 2275565568. Throughput: 0: 10414.4. Samples: 318890478. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:53,977][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:54,436][626795] Updated weights for policy 0, policy_version 277782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:56,377][626795] Updated weights for policy 0, policy_version 277792 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:39:58,348][626795] Updated weights for policy 0, policy_version 277802 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:58,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41642.6, 300 sec: 41321.0). Total num frames: 2275778560. Throughput: 0: 10404.2. Samples: 318921366. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:39:58,977][24592] Avg episode reward: [(0, '4.505')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:00,364][626795] Updated weights for policy 0, policy_version 277812 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:02,409][626795] Updated weights for policy 0, policy_version 277822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:03,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41642.4, 300 sec: 41348.8). Total num frames: 2275983360. Throughput: 0: 10392.1. Samples: 318983064. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:03,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:04,388][626795] Updated weights for policy 0, policy_version 277832 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:06,360][626795] Updated weights for policy 0, policy_version 277842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:08,235][626795] Updated weights for policy 0, policy_version 277852 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:08,977][24592] Fps is (10 sec: 40955.8, 60 sec: 41505.3, 300 sec: 41376.4). Total num frames: 2276188160. Throughput: 0: 10370.0. Samples: 319045554. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:08,978][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:10,293][626795] Updated weights for policy 0, policy_version 277862 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:12,176][626795] Updated weights for policy 0, policy_version 277872 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:13,982][24592] Fps is (10 sec: 41754.2, 60 sec: 41638.3, 300 sec: 41375.7). Total num frames: 2276401152. Throughput: 0: 10375.2. Samples: 319076580. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:13,983][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:14,220][626795] Updated weights for policy 0, policy_version 277882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:16,111][626795] Updated weights for policy 0, policy_version 277892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:18,080][626795] Updated weights for policy 0, policy_version 277902 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:18,975][24592] Fps is (10 sec: 41784.0, 60 sec: 41642.7, 300 sec: 41376.5). Total num frames: 2276605952. Throughput: 0: 10402.0. Samples: 319139478. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:18,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:20,014][626795] Updated weights for policy 0, policy_version 277912 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:22,036][626795] Updated weights for policy 0, policy_version 277922 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:23,879][626795] Updated weights for policy 0, policy_version 277932 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:23,977][24592] Fps is (10 sec: 41800.9, 60 sec: 41505.3, 300 sec: 41404.2). Total num frames: 2276818944. Throughput: 0: 10418.4. Samples: 319202760. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:23,977][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:25,871][626795] Updated weights for policy 0, policy_version 277942 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:27,858][626795] Updated weights for policy 0, policy_version 277952 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:28,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41643.1, 300 sec: 41404.3). Total num frames: 2277031936. Throughput: 0: 10417.7. Samples: 319233942. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:28,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:29,867][626795] Updated weights for policy 0, policy_version 277962 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:31,840][626795] Updated weights for policy 0, policy_version 277972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:33,730][626795] Updated weights for policy 0, policy_version 277982 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:33,975][24592] Fps is (10 sec: 41783.5, 60 sec: 41642.6, 300 sec: 41404.3). Total num frames: 2277236736. Throughput: 0: 10410.5. Samples: 319296318. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:33,976][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:35,797][626795] Updated weights for policy 0, policy_version 277992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:37,760][626795] Updated weights for policy 0, policy_version 278002 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:38,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41779.2, 300 sec: 41432.1). Total num frames: 2277449728. Throughput: 0: 10408.3. Samples: 319358850. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:38,979][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:39,655][626795] Updated weights for policy 0, policy_version 278012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:41,737][626795] Updated weights for policy 0, policy_version 278022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:43,552][626795] Updated weights for policy 0, policy_version 278032 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:43,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41643.0, 300 sec: 41404.6). Total num frames: 2277646336. Throughput: 0: 10407.0. Samples: 319389678. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:43,976][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:45,583][626795] Updated weights for policy 0, policy_version 278042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:47,456][626795] Updated weights for policy 0, policy_version 278052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:48,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41642.6, 300 sec: 41432.1). Total num frames: 2277859328. Throughput: 0: 10444.6. Samples: 319453068. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:48,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:49,513][626795] Updated weights for policy 0, policy_version 278062 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:51,517][626795] Updated weights for policy 0, policy_version 278072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:53,476][626795] Updated weights for policy 0, policy_version 278082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:53,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41642.7, 300 sec: 41432.1). Total num frames: 2278064128. Throughput: 0: 10437.2. Samples: 319515216. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:53,978][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:55,382][626795] Updated weights for policy 0, policy_version 278092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:57,337][626795] Updated weights for policy 0, policy_version 278102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41642.8, 300 sec: 41459.9). Total num frames: 2278277120. Throughput: 0: 10426.3. Samples: 319545696. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:40:58,978][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:40:59,380][626795] Updated weights for policy 0, policy_version 278112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:01,401][626795] Updated weights for policy 0, policy_version 278122 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:03,258][626795] Updated weights for policy 0, policy_version 278132 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:03,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41642.8, 300 sec: 41432.1). Total num frames: 2278481920. Throughput: 0: 10422.0. Samples: 319608468. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:03,976][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000278135_2278481920.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:04,107][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000276920_2268528640.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:05,356][626795] Updated weights for policy 0, policy_version 278142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:07,334][626795] Updated weights for policy 0, policy_version 278152 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:08,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41643.5, 300 sec: 41432.1). Total num frames: 2278686720. Throughput: 0: 10368.9. Samples: 319669350. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:08,976][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:09,420][626795] Updated weights for policy 0, policy_version 278162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:11,448][626795] Updated weights for policy 0, policy_version 278172 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:13,304][626795] Updated weights for policy 0, policy_version 278182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:13,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41510.5, 300 sec: 41432.1). Total num frames: 2278891520. Throughput: 0: 10362.9. Samples: 319700274. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:13,977][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:15,296][626795] Updated weights for policy 0, policy_version 278192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:17,237][626795] Updated weights for policy 0, policy_version 278202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:18,976][24592] Fps is (10 sec: 40959.5, 60 sec: 41506.1, 300 sec: 41432.1). Total num frames: 2279096320. Throughput: 0: 10362.5. Samples: 319762632. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:18,977][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:19,207][626795] Updated weights for policy 0, policy_version 278212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:21,218][626795] Updated weights for policy 0, policy_version 278222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:23,105][626795] Updated weights for policy 0, policy_version 278232 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:23,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41506.9, 300 sec: 41459.9). Total num frames: 2279309312. Throughput: 0: 10364.7. Samples: 319825260. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:23,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:25,040][626795] Updated weights for policy 0, policy_version 278242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:27,031][626795] Updated weights for policy 0, policy_version 278252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:28,976][24592] Fps is (10 sec: 41776.5, 60 sec: 41369.0, 300 sec: 41432.0). Total num frames: 2279514112. Throughput: 0: 10373.0. Samples: 319856472. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:28,979][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:29,070][626795] Updated weights for policy 0, policy_version 278262 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:30,980][626795] Updated weights for policy 0, policy_version 278272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:32,953][626795] Updated weights for policy 0, policy_version 278282 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:33,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41506.2, 300 sec: 41459.9). Total num frames: 2279727104. Throughput: 0: 10353.2. Samples: 319918962. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:33,978][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:34,896][626795] Updated weights for policy 0, policy_version 278292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:37,063][626795] Updated weights for policy 0, policy_version 278302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:38,896][626795] Updated weights for policy 0, policy_version 278312 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:38,976][24592] Fps is (10 sec: 41781.7, 60 sec: 41369.5, 300 sec: 41460.0). Total num frames: 2279931904. Throughput: 0: 10339.2. Samples: 319980480. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:38,978][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:40,882][626795] Updated weights for policy 0, policy_version 278322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:42,928][626795] Updated weights for policy 0, policy_version 278332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:43,976][24592] Fps is (10 sec: 40958.6, 60 sec: 41505.9, 300 sec: 41459.9). Total num frames: 2280136704. Throughput: 0: 10350.4. Samples: 320011470. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:43,976][24592] Avg episode reward: [(0, '4.386')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:44,860][626795] Updated weights for policy 0, policy_version 278342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:46,809][626795] Updated weights for policy 0, policy_version 278352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:48,886][626795] Updated weights for policy 0, policy_version 278362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:48,975][24592] Fps is (10 sec: 40960.9, 60 sec: 41369.6, 300 sec: 41459.9). Total num frames: 2280341504. Throughput: 0: 10341.2. Samples: 320073822. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:48,978][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:50,914][626795] Updated weights for policy 0, policy_version 278372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:52,804][626795] Updated weights for policy 0, policy_version 278382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:53,975][24592] Fps is (10 sec: 40961.6, 60 sec: 41369.7, 300 sec: 41459.9). Total num frames: 2280546304. Throughput: 0: 10363.3. Samples: 320135700. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:53,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:54,907][626795] Updated weights for policy 0, policy_version 278392 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:56,686][626795] Updated weights for policy 0, policy_version 278402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:41:58,759][626795] Updated weights for policy 0, policy_version 278412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:58,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41233.1, 300 sec: 41432.1). Total num frames: 2280751104. Throughput: 0: 10363.1. Samples: 320166612. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:41:58,976][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:00,747][626795] Updated weights for policy 0, policy_version 278422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:02,728][626795] Updated weights for policy 0, policy_version 278432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.7, 300 sec: 41459.9). Total num frames: 2280964096. Throughput: 0: 10357.0. Samples: 320228694. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:03,976][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:04,662][626795] Updated weights for policy 0, policy_version 278442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:06,706][626795] Updated weights for policy 0, policy_version 278452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:08,700][626795] Updated weights for policy 0, policy_version 278462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:08,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41369.6, 300 sec: 41459.8). Total num frames: 2281168896. Throughput: 0: 10341.9. Samples: 320290644. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:08,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:10,788][626795] Updated weights for policy 0, policy_version 278472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:12,659][626795] Updated weights for policy 0, policy_version 278482 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:13,976][24592] Fps is (10 sec: 40957.2, 60 sec: 41369.2, 300 sec: 41459.8). Total num frames: 2281373696. Throughput: 0: 10318.4. Samples: 320320800. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:13,977][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:14,806][626795] Updated weights for policy 0, policy_version 278492 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:16,726][626795] Updated weights for policy 0, policy_version 278502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:18,774][626795] Updated weights for policy 0, policy_version 278512 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:18,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41369.7, 300 sec: 41459.9). Total num frames: 2281578496. Throughput: 0: 10294.7. Samples: 320382222. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:18,976][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:20,682][626795] Updated weights for policy 0, policy_version 278522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:22,759][626795] Updated weights for policy 0, policy_version 278532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:23,975][24592] Fps is (10 sec: 40962.6, 60 sec: 41233.0, 300 sec: 41459.8). Total num frames: 2281783296. Throughput: 0: 10315.0. Samples: 320444652. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:23,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:24,636][626795] Updated weights for policy 0, policy_version 278542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:26,607][626795] Updated weights for policy 0, policy_version 278552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:28,551][626795] Updated weights for policy 0, policy_version 278562 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:28,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41370.1, 300 sec: 41487.6). Total num frames: 2281996288. Throughput: 0: 10311.8. Samples: 320475498. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:28,976][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:30,587][626795] Updated weights for policy 0, policy_version 278572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:32,583][626795] Updated weights for policy 0, policy_version 278582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:33,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41233.1, 300 sec: 41459.9). Total num frames: 2282201088. Throughput: 0: 10309.2. Samples: 320537736. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:33,976][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:34,601][626795] Updated weights for policy 0, policy_version 278592 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:36,574][626795] Updated weights for policy 0, policy_version 278602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:38,452][626795] Updated weights for policy 0, policy_version 278612 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:38,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41369.6, 300 sec: 41487.6). Total num frames: 2282414080. Throughput: 0: 10326.8. Samples: 320600406. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:38,977][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:40,473][626795] Updated weights for policy 0, policy_version 278622 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:42,498][626795] Updated weights for policy 0, policy_version 278632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:43,975][24592] Fps is (10 sec: 40959.5, 60 sec: 41233.2, 300 sec: 41432.1). Total num frames: 2282610688. Throughput: 0: 10303.8. Samples: 320630286. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:43,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:44,554][626795] Updated weights for policy 0, policy_version 278642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:46,454][626795] Updated weights for policy 0, policy_version 278652 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:48,462][626795] Updated weights for policy 0, policy_version 278662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:48,976][24592] Fps is (10 sec: 40139.0, 60 sec: 41232.6, 300 sec: 41432.0). Total num frames: 2282815488. Throughput: 0: 10296.7. Samples: 320692050. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:48,978][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:50,563][626795] Updated weights for policy 0, policy_version 278672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:52,480][626795] Updated weights for policy 0, policy_version 278682 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:53,976][24592] Fps is (10 sec: 40957.1, 60 sec: 41232.5, 300 sec: 41432.0). Total num frames: 2283020288. Throughput: 0: 10288.6. Samples: 320753640. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:53,977][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:54,469][626795] Updated weights for policy 0, policy_version 278692 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:55,218][626772] Signal inference workers to stop experience collection... (4200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:55,220][626772] Signal inference workers to resume experience collection... (4200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:55,242][626795] InferenceWorker_p0-w0: stopping experience collection (4200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:55,251][626795] InferenceWorker_p0-w0: resuming experience collection (4200 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:56,482][626795] Updated weights for policy 0, policy_version 278702 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:42:58,450][626795] Updated weights for policy 0, policy_version 278712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:58,975][24592] Fps is (10 sec: 40962.3, 60 sec: 41233.0, 300 sec: 41404.5). Total num frames: 2283225088. Throughput: 0: 10286.7. Samples: 320783694. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:42:58,976][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:00,590][626795] Updated weights for policy 0, policy_version 278722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:02,416][626795] Updated weights for policy 0, policy_version 278732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:03,975][24592] Fps is (10 sec: 40963.0, 60 sec: 41096.5, 300 sec: 41432.2). Total num frames: 2283429888. Throughput: 0: 10304.9. Samples: 320845944. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:03,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000278739_2283429888.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:04,123][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000277526_2273492992.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:04,506][626795] Updated weights for policy 0, policy_version 278742 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:06,450][626795] Updated weights for policy 0, policy_version 278752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:08,415][626795] Updated weights for policy 0, policy_version 278762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:08,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41233.1, 300 sec: 41432.1). Total num frames: 2283642880. Throughput: 0: 10296.0. Samples: 320907972. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:08,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:10,319][626795] Updated weights for policy 0, policy_version 278772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:12,439][626795] Updated weights for policy 0, policy_version 278782 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:13,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41096.9, 300 sec: 41376.6). Total num frames: 2283839488. Throughput: 0: 10292.8. Samples: 320938674. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:13,977][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:14,471][626795] Updated weights for policy 0, policy_version 278792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:16,439][626795] Updated weights for policy 0, policy_version 278802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:18,422][626795] Updated weights for policy 0, policy_version 278812 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:18,975][24592] Fps is (10 sec: 40140.5, 60 sec: 41096.5, 300 sec: 41376.6). Total num frames: 2284044288. Throughput: 0: 10275.8. Samples: 321000150. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:18,976][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:20,464][626795] Updated weights for policy 0, policy_version 278822 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:22,411][626795] Updated weights for policy 0, policy_version 278832 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:23,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41233.1, 300 sec: 41432.1). Total num frames: 2284257280. Throughput: 0: 10243.6. Samples: 321061368. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:23,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:24,381][626795] Updated weights for policy 0, policy_version 278842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:26,398][626795] Updated weights for policy 0, policy_version 278852 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:28,308][626795] Updated weights for policy 0, policy_version 278862 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:28,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41096.5, 300 sec: 41404.3). Total num frames: 2284462080. Throughput: 0: 10275.1. Samples: 321092664. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:28,976][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:30,257][626795] Updated weights for policy 0, policy_version 278872 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:32,311][626795] Updated weights for policy 0, policy_version 278882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:33,976][24592] Fps is (10 sec: 40959.1, 60 sec: 41096.4, 300 sec: 41404.3). Total num frames: 2284666880. Throughput: 0: 10279.9. Samples: 321154644. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:33,978][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:34,316][626795] Updated weights for policy 0, policy_version 278892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:36,219][626795] Updated weights for policy 0, policy_version 278902 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:38,184][626795] Updated weights for policy 0, policy_version 278912 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:38,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40960.0, 300 sec: 41404.3). Total num frames: 2284871680. Throughput: 0: 10300.6. Samples: 321217158. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:38,977][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:40,208][626795] Updated weights for policy 0, policy_version 278922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:42,132][626795] Updated weights for policy 0, policy_version 278932 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:43,977][24592] Fps is (10 sec: 41775.3, 60 sec: 41232.3, 300 sec: 41404.1). Total num frames: 2285084672. Throughput: 0: 10327.7. Samples: 321248454. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:43,979][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:44,179][626795] Updated weights for policy 0, policy_version 278942 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:46,174][626795] Updated weights for policy 0, policy_version 278952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:48,157][626795] Updated weights for policy 0, policy_version 278962 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:48,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41233.5, 300 sec: 41404.3). Total num frames: 2285289472. Throughput: 0: 10306.7. Samples: 321309744. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:48,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:50,249][626795] Updated weights for policy 0, policy_version 278972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:52,137][626795] Updated weights for policy 0, policy_version 278982 (0.0030)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:53,975][24592] Fps is (10 sec: 40964.6, 60 sec: 41233.6, 300 sec: 41404.3). Total num frames: 2285494272. Throughput: 0: 10301.9. Samples: 321371556. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:53,977][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:54,139][626795] Updated weights for policy 0, policy_version 278992 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:56,103][626795] Updated weights for policy 0, policy_version 279002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:43:58,107][626795] Updated weights for policy 0, policy_version 279012 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:58,976][24592] Fps is (10 sec: 40959.5, 60 sec: 41233.0, 300 sec: 41404.3). Total num frames: 2285699072. Throughput: 0: 10295.7. Samples: 321401982. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:43:58,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:00,143][626795] Updated weights for policy 0, policy_version 279022 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:02,072][626795] Updated weights for policy 0, policy_version 279032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:03,967][626795] Updated weights for policy 0, policy_version 279042 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:03,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41369.6, 300 sec: 41404.3). Total num frames: 2285912064. Throughput: 0: 10308.9. Samples: 321464052. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:03,977][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:05,962][626795] Updated weights for policy 0, policy_version 279052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:07,853][626795] Updated weights for policy 0, policy_version 279062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:08,976][24592] Fps is (10 sec: 41775.8, 60 sec: 41232.4, 300 sec: 41404.2). Total num frames: 2286116864. Throughput: 0: 10355.5. Samples: 321527376. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:08,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:09,931][626795] Updated weights for policy 0, policy_version 279072 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:11,830][626795] Updated weights for policy 0, policy_version 279082 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:13,851][626795] Updated weights for policy 0, policy_version 279092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:13,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41369.7, 300 sec: 41404.3). Total num frames: 2286321664. Throughput: 0: 10356.2. Samples: 321558690. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:13,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:15,784][626795] Updated weights for policy 0, policy_version 279102 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:17,762][626795] Updated weights for policy 0, policy_version 279112 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:18,975][24592] Fps is (10 sec: 40963.7, 60 sec: 41369.6, 300 sec: 41348.8). Total num frames: 2286526464. Throughput: 0: 10355.8. Samples: 321620652. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:18,977][24592] Avg episode reward: [(0, '4.907')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:19,846][626795] Updated weights for policy 0, policy_version 279122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:21,777][626795] Updated weights for policy 0, policy_version 279132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:23,841][626795] Updated weights for policy 0, policy_version 279142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:23,981][24592] Fps is (10 sec: 40936.8, 60 sec: 41229.2, 300 sec: 41348.1). Total num frames: 2286731264. Throughput: 0: 10322.7. Samples: 321681738. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:23,984][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:25,741][626795] Updated weights for policy 0, policy_version 279152 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:27,763][626795] Updated weights for policy 0, policy_version 279162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:28,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.7, 300 sec: 41376.5). Total num frames: 2286944256. Throughput: 0: 10325.3. Samples: 321713082. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:28,978][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:29,753][626795] Updated weights for policy 0, policy_version 279172 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:31,650][626795] Updated weights for policy 0, policy_version 279182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:33,700][626795] Updated weights for policy 0, policy_version 279192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:33,976][24592] Fps is (10 sec: 42619.8, 60 sec: 41505.8, 300 sec: 41404.2). Total num frames: 2287157248. Throughput: 0: 10349.6. Samples: 321775482. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:33,977][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:35,627][626795] Updated weights for policy 0, policy_version 279202 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:37,630][626795] Updated weights for policy 0, policy_version 279212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:38,976][24592] Fps is (10 sec: 41777.5, 60 sec: 41505.9, 300 sec: 41404.3). Total num frames: 2287362048. Throughput: 0: 10373.2. Samples: 321838356. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:38,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:39,547][626795] Updated weights for policy 0, policy_version 279222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:41,538][626795] Updated weights for policy 0, policy_version 279232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:43,414][626795] Updated weights for policy 0, policy_version 279242 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:43,975][24592] Fps is (10 sec: 41782.0, 60 sec: 41506.9, 300 sec: 41404.3). Total num frames: 2287575040. Throughput: 0: 10391.2. Samples: 321869586. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:43,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:45,338][626795] Updated weights for policy 0, policy_version 279252 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:47,303][626795] Updated weights for policy 0, policy_version 279262 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:48,975][24592] Fps is (10 sec: 41780.8, 60 sec: 41506.1, 300 sec: 41404.3). Total num frames: 2287779840. Throughput: 0: 10403.1. Samples: 321932190. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:48,978][24592] Avg episode reward: [(0, '4.430')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:49,352][626795] Updated weights for policy 0, policy_version 279272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:51,331][626795] Updated weights for policy 0, policy_version 279282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:53,392][626795] Updated weights for policy 0, policy_version 279292 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:53,976][24592] Fps is (10 sec: 40137.2, 60 sec: 41369.0, 300 sec: 41348.7). Total num frames: 2287976448. Throughput: 0: 10373.6. Samples: 321994188. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:53,980][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:55,421][626795] Updated weights for policy 0, policy_version 279302 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:57,346][626795] Updated weights for policy 0, policy_version 279312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:58,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41506.2, 300 sec: 41376.6). Total num frames: 2288189440. Throughput: 0: 10347.7. Samples: 322024338. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:44:58,976][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:44:59,361][626795] Updated weights for policy 0, policy_version 279322 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:01,364][626795] Updated weights for policy 0, policy_version 279332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:03,257][626795] Updated weights for policy 0, policy_version 279342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:03,975][24592] Fps is (10 sec: 41782.8, 60 sec: 41369.7, 300 sec: 41376.7). Total num frames: 2288394240. Throughput: 0: 10356.7. Samples: 322086702. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:03,976][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000279345_2288394240.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:04,083][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000278135_2278481920.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:05,209][626795] Updated weights for policy 0, policy_version 279352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:07,238][626795] Updated weights for policy 0, policy_version 279362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:08,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41370.3, 300 sec: 41349.7). Total num frames: 2288599040. Throughput: 0: 10370.8. Samples: 322148364. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:08,976][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:09,276][626795] Updated weights for policy 0, policy_version 279372 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:11,291][626795] Updated weights for policy 0, policy_version 279382 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:13,155][626795] Updated weights for policy 0, policy_version 279392 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:13,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.2, 300 sec: 41376.6). Total num frames: 2288812032. Throughput: 0: 10361.5. Samples: 322179348. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:13,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:15,154][626795] Updated weights for policy 0, policy_version 279402 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:17,122][626795] Updated weights for policy 0, policy_version 279412 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:18,976][24592] Fps is (10 sec: 41777.5, 60 sec: 41505.9, 300 sec: 41348.9). Total num frames: 2289016832. Throughput: 0: 10367.7. Samples: 322242024. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:18,977][24592] Avg episode reward: [(0, '4.840')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:19,057][626795] Updated weights for policy 0, policy_version 279422 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:21,120][626795] Updated weights for policy 0, policy_version 279432 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:23,112][626795] Updated weights for policy 0, policy_version 279442 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:23,975][24592] Fps is (10 sec: 40959.5, 60 sec: 41510.0, 300 sec: 41321.0). Total num frames: 2289221632. Throughput: 0: 10336.9. Samples: 322303512. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:23,977][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:25,171][626795] Updated weights for policy 0, policy_version 279452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:27,030][626795] Updated weights for policy 0, policy_version 279462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:28,975][24592] Fps is (10 sec: 40142.7, 60 sec: 41233.1, 300 sec: 41293.3). Total num frames: 2289418240. Throughput: 0: 10318.4. Samples: 322333914. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:28,976][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:29,172][626795] Updated weights for policy 0, policy_version 279472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:31,120][626795] Updated weights for policy 0, policy_version 279482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:33,146][626795] Updated weights for policy 0, policy_version 279492 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:33,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41233.5, 300 sec: 41293.2). Total num frames: 2289631232. Throughput: 0: 10301.7. Samples: 322395768. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:33,977][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:35,092][626795] Updated weights for policy 0, policy_version 279502 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:37,018][626795] Updated weights for policy 0, policy_version 279512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:38,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41233.4, 300 sec: 41321.0). Total num frames: 2289836032. Throughput: 0: 10310.6. Samples: 322458156. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:38,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:38,991][626795] Updated weights for policy 0, policy_version 279522 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:40,985][626795] Updated weights for policy 0, policy_version 279532 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:42,943][626795] Updated weights for policy 0, policy_version 279542 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:43,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41232.9, 300 sec: 41321.0). Total num frames: 2290049024. Throughput: 0: 10329.0. Samples: 322489146. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:43,978][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:44,964][626795] Updated weights for policy 0, policy_version 279552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:46,863][626795] Updated weights for policy 0, policy_version 279562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:48,805][626795] Updated weights for policy 0, policy_version 279572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:48,981][24592] Fps is (10 sec: 41754.6, 60 sec: 41229.0, 300 sec: 41320.2). Total num frames: 2290253824. Throughput: 0: 10340.5. Samples: 322552086. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:48,984][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:50,901][626795] Updated weights for policy 0, policy_version 279582 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:52,779][626795] Updated weights for policy 0, policy_version 279592 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:53,976][24592] Fps is (10 sec: 40957.9, 60 sec: 41369.7, 300 sec: 41293.1). Total num frames: 2290458624. Throughput: 0: 10335.4. Samples: 322613466. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:53,979][24592] Avg episode reward: [(0, '4.798')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:54,823][626795] Updated weights for policy 0, policy_version 279602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:56,912][626795] Updated weights for policy 0, policy_version 279612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:45:58,847][626795] Updated weights for policy 0, policy_version 279622 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:58,975][24592] Fps is (10 sec: 40984.3, 60 sec: 41233.1, 300 sec: 41293.3). Total num frames: 2290663424. Throughput: 0: 10316.8. Samples: 322643604. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:45:58,977][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:00,847][626795] Updated weights for policy 0, policy_version 279632 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:02,813][626795] Updated weights for policy 0, policy_version 279642 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:03,978][24592] Fps is (10 sec: 40955.1, 60 sec: 41231.8, 300 sec: 41293.0). Total num frames: 2290868224. Throughput: 0: 10304.2. Samples: 322705728. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:03,980][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:04,842][626795] Updated weights for policy 0, policy_version 279652 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:06,800][626795] Updated weights for policy 0, policy_version 279662 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:08,806][626795] Updated weights for policy 0, policy_version 279672 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:08,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2291081216. Throughput: 0: 10318.7. Samples: 322767852. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:08,977][24592] Avg episode reward: [(0, '4.798')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:10,771][626795] Updated weights for policy 0, policy_version 279682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:12,797][626795] Updated weights for policy 0, policy_version 279692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:13,975][24592] Fps is (10 sec: 41787.1, 60 sec: 41233.0, 300 sec: 41321.0). Total num frames: 2291286016. Throughput: 0: 10336.2. Samples: 322799046. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:13,977][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:14,706][626795] Updated weights for policy 0, policy_version 279702 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:16,638][626795] Updated weights for policy 0, policy_version 279712 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:18,702][626795] Updated weights for policy 0, policy_version 279722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41369.9, 300 sec: 41321.0). Total num frames: 2291499008. Throughput: 0: 10332.7. Samples: 322860738. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:18,976][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:20,631][626795] Updated weights for policy 0, policy_version 279732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:22,617][626795] Updated weights for policy 0, policy_version 279742 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:23,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41233.1, 300 sec: 41293.3). Total num frames: 2291695616. Throughput: 0: 10332.5. Samples: 322923120. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:23,977][24592] Avg episode reward: [(0, '4.970')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:24,584][626795] Updated weights for policy 0, policy_version 279752 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:26,580][626795] Updated weights for policy 0, policy_version 279762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:28,590][626795] Updated weights for policy 0, policy_version 279772 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:28,975][24592] Fps is (10 sec: 40140.6, 60 sec: 41369.5, 300 sec: 41265.5). Total num frames: 2291900416. Throughput: 0: 10318.0. Samples: 322953456. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:28,976][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:30,682][626795] Updated weights for policy 0, policy_version 279782 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:32,624][626795] Updated weights for policy 0, policy_version 279792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:33,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.6, 300 sec: 41293.3). Total num frames: 2292113408. Throughput: 0: 10286.8. Samples: 323014932. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:33,977][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:34,556][626795] Updated weights for policy 0, policy_version 279802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:36,592][626795] Updated weights for policy 0, policy_version 279812 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:38,542][626795] Updated weights for policy 0, policy_version 279822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:38,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41369.4, 300 sec: 41293.2). Total num frames: 2292318208. Throughput: 0: 10313.0. Samples: 323077548. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:38,978][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:40,509][626795] Updated weights for policy 0, policy_version 279832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:42,476][626795] Updated weights for policy 0, policy_version 279842 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:43,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41233.2, 300 sec: 41293.2). Total num frames: 2292523008. Throughput: 0: 10336.1. Samples: 323108730. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:43,977][24592] Avg episode reward: [(0, '4.913')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:44,447][626795] Updated weights for policy 0, policy_version 279852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:46,374][626795] Updated weights for policy 0, policy_version 279862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:48,389][626795] Updated weights for policy 0, policy_version 279872 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:48,978][24592] Fps is (10 sec: 41768.8, 60 sec: 41371.7, 300 sec: 41320.6). Total num frames: 2292736000. Throughput: 0: 10346.3. Samples: 323171322. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:48,979][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:50,479][626795] Updated weights for policy 0, policy_version 279882 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:52,328][626795] Updated weights for policy 0, policy_version 279892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:53,976][24592] Fps is (10 sec: 41777.9, 60 sec: 41369.9, 300 sec: 41321.0). Total num frames: 2292940800. Throughput: 0: 10349.1. Samples: 323233566. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:53,977][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:54,372][626795] Updated weights for policy 0, policy_version 279902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:56,244][626795] Updated weights for policy 0, policy_version 279912 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:46:58,233][626795] Updated weights for policy 0, policy_version 279922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:58,976][24592] Fps is (10 sec: 40970.6, 60 sec: 41369.4, 300 sec: 41293.2). Total num frames: 2293145600. Throughput: 0: 10340.8. Samples: 323264382. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:46:58,978][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:00,423][626795] Updated weights for policy 0, policy_version 279932 (0.0037)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:02,282][626795] Updated weights for policy 0, policy_version 279942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:03,975][24592] Fps is (10 sec: 40960.9, 60 sec: 41370.9, 300 sec: 41293.2). Total num frames: 2293350400. Throughput: 0: 10321.2. Samples: 323325192. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:03,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000279950_2293350400.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:04,114][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000278739_2283429888.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:04,421][626795] Updated weights for policy 0, policy_version 279952 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:06,288][626795] Updated weights for policy 0, policy_version 279962 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:08,316][626795] Updated weights for policy 0, policy_version 279972 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:08,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41233.0, 300 sec: 41293.3). Total num frames: 2293555200. Throughput: 0: 10299.3. Samples: 323386590. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:08,976][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:10,376][626795] Updated weights for policy 0, policy_version 279982 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:12,369][626795] Updated weights for policy 0, policy_version 279992 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:13,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2293768192. Throughput: 0: 10326.0. Samples: 323418126. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:13,976][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:14,309][626795] Updated weights for policy 0, policy_version 280002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:16,225][626795] Updated weights for policy 0, policy_version 280012 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:18,120][626795] Updated weights for policy 0, policy_version 280022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:18,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41233.1, 300 sec: 41321.0). Total num frames: 2293972992. Throughput: 0: 10351.9. Samples: 323480766. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:18,977][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:20,158][626795] Updated weights for policy 0, policy_version 280032 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:22,144][626795] Updated weights for policy 0, policy_version 280042 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:23,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41369.6, 300 sec: 41293.2). Total num frames: 2294177792. Throughput: 0: 10349.4. Samples: 323543268. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:23,977][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:24,057][626795] Updated weights for policy 0, policy_version 280052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:26,110][626795] Updated weights for policy 0, policy_version 280062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:27,965][626795] Updated weights for policy 0, policy_version 280072 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:28,975][24592] Fps is (10 sec: 41778.6, 60 sec: 41506.1, 300 sec: 41321.0). Total num frames: 2294390784. Throughput: 0: 10330.0. Samples: 323573580. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:28,976][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:30,136][626795] Updated weights for policy 0, policy_version 280082 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:32,052][626795] Updated weights for policy 0, policy_version 280092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:33,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41233.1, 300 sec: 41265.5). Total num frames: 2294587392. Throughput: 0: 10306.6. Samples: 323635092. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:33,976][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:33,978][626795] Updated weights for policy 0, policy_version 280102 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:36,099][626795] Updated weights for policy 0, policy_version 280112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:38,088][626795] Updated weights for policy 0, policy_version 280122 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:38,975][24592] Fps is (10 sec: 40141.1, 60 sec: 41233.3, 300 sec: 41293.2). Total num frames: 2294792192. Throughput: 0: 10302.1. Samples: 323697156. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:38,977][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:40,027][626795] Updated weights for policy 0, policy_version 280132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:41,999][626795] Updated weights for policy 0, policy_version 280142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:43,956][626795] Updated weights for policy 0, policy_version 280152 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.6, 300 sec: 41321.1). Total num frames: 2295005184. Throughput: 0: 10291.1. Samples: 323727480. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:43,976][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:46,012][626795] Updated weights for policy 0, policy_version 280162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:47,855][626795] Updated weights for policy 0, policy_version 280172 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:48,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41235.0, 300 sec: 41321.1). Total num frames: 2295209984. Throughput: 0: 10341.5. Samples: 323790558. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:48,977][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:49,875][626795] Updated weights for policy 0, policy_version 280182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:51,824][626795] Updated weights for policy 0, policy_version 280192 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:53,785][626795] Updated weights for policy 0, policy_version 280202 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:53,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.8, 300 sec: 41348.8). Total num frames: 2295422976. Throughput: 0: 10374.9. Samples: 323853462. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:53,976][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:55,853][626795] Updated weights for policy 0, policy_version 280212 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:57,813][626795] Updated weights for policy 0, policy_version 280222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:58,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.8, 300 sec: 41348.8). Total num frames: 2295627776. Throughput: 0: 10341.1. Samples: 323883474. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:47:58,976][24592] Avg episode reward: [(0, '4.927')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:47:59,760][626795] Updated weights for policy 0, policy_version 280232 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:01,687][626795] Updated weights for policy 0, policy_version 280242 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:03,724][626795] Updated weights for policy 0, policy_version 280252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:03,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2295832576. Throughput: 0: 10328.0. Samples: 323945526. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:03,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:05,791][626795] Updated weights for policy 0, policy_version 280262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:07,763][626795] Updated weights for policy 0, policy_version 280272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:08,975][24592] Fps is (10 sec: 40140.5, 60 sec: 41233.1, 300 sec: 41321.0). Total num frames: 2296029184. Throughput: 0: 10297.7. Samples: 324006666. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:08,977][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:09,868][626795] Updated weights for policy 0, policy_version 280282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:11,731][626795] Updated weights for policy 0, policy_version 280292 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:13,779][626795] Updated weights for policy 0, policy_version 280302 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:13,976][24592] Fps is (10 sec: 40139.1, 60 sec: 41096.2, 300 sec: 41321.0). Total num frames: 2296233984. Throughput: 0: 10300.7. Samples: 324037116. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:13,977][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:15,807][626795] Updated weights for policy 0, policy_version 280312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:17,707][626795] Updated weights for policy 0, policy_version 280322 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41233.0, 300 sec: 41321.0). Total num frames: 2296446976. Throughput: 0: 10316.8. Samples: 324099348. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:18,978][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:19,794][626795] Updated weights for policy 0, policy_version 280332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:21,794][626795] Updated weights for policy 0, policy_version 280342 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:23,691][626795] Updated weights for policy 0, policy_version 280352 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:23,975][24592] Fps is (10 sec: 40961.7, 60 sec: 41096.6, 300 sec: 41293.2). Total num frames: 2296643584. Throughput: 0: 10304.7. Samples: 324160866. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:23,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:25,783][626795] Updated weights for policy 0, policy_version 280362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:27,663][626795] Updated weights for policy 0, policy_version 280372 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:28,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41096.6, 300 sec: 41321.0). Total num frames: 2296856576. Throughput: 0: 10311.2. Samples: 324191484. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:28,976][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:29,774][626795] Updated weights for policy 0, policy_version 280382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:31,774][626795] Updated weights for policy 0, policy_version 280392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:33,646][626795] Updated weights for policy 0, policy_version 280402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:33,976][24592] Fps is (10 sec: 41778.7, 60 sec: 41233.0, 300 sec: 41321.0). Total num frames: 2297061376. Throughput: 0: 10294.4. Samples: 324253806. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:33,979][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:35,681][626795] Updated weights for policy 0, policy_version 280412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:37,751][626795] Updated weights for policy 0, policy_version 280422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:38,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41233.1, 300 sec: 41293.4). Total num frames: 2297266176. Throughput: 0: 10251.3. Samples: 324314772. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:38,977][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:39,742][626795] Updated weights for policy 0, policy_version 280432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:41,691][626795] Updated weights for policy 0, policy_version 280442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:43,827][626795] Updated weights for policy 0, policy_version 280452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:43,976][24592] Fps is (10 sec: 40957.0, 60 sec: 41095.9, 300 sec: 41293.1). Total num frames: 2297470976. Throughput: 0: 10261.5. Samples: 324345252. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:43,978][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:45,767][626795] Updated weights for policy 0, policy_version 280462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:47,650][626795] Updated weights for policy 0, policy_version 280472 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:48,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41096.5, 300 sec: 41293.2). Total num frames: 2297675776. Throughput: 0: 10250.8. Samples: 324406812. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:48,977][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:49,703][626795] Updated weights for policy 0, policy_version 280482 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:51,698][626795] Updated weights for policy 0, policy_version 280492 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:53,648][626795] Updated weights for policy 0, policy_version 280502 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:53,975][24592] Fps is (10 sec: 40963.7, 60 sec: 40960.0, 300 sec: 41293.3). Total num frames: 2297880576. Throughput: 0: 10263.7. Samples: 324468534. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:53,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:55,717][626795] Updated weights for policy 0, policy_version 280512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:57,723][626795] Updated weights for policy 0, policy_version 280522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:58,976][24592] Fps is (10 sec: 41775.0, 60 sec: 41095.8, 300 sec: 41293.1). Total num frames: 2298093568. Throughput: 0: 10259.5. Samples: 324498798. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:48:58,978][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:48:59,615][626795] Updated weights for policy 0, policy_version 280532 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:01,625][626795] Updated weights for policy 0, policy_version 280542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:03,593][626795] Updated weights for policy 0, policy_version 280552 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41096.6, 300 sec: 41293.4). Total num frames: 2298298368. Throughput: 0: 10264.9. Samples: 324561270. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:03,977][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000280554_2298298368.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:04,104][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000279345_2288394240.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:05,662][626795] Updated weights for policy 0, policy_version 280562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:07,661][626795] Updated weights for policy 0, policy_version 280572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:08,975][24592] Fps is (10 sec: 40144.8, 60 sec: 41096.6, 300 sec: 41265.5). Total num frames: 2298494976. Throughput: 0: 10249.2. Samples: 324622080. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:08,977][24592] Avg episode reward: [(0, '4.894')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:09,726][626795] Updated weights for policy 0, policy_version 280582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:11,748][626795] Updated weights for policy 0, policy_version 280592 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:13,629][626795] Updated weights for policy 0, policy_version 280602 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:13,975][24592] Fps is (10 sec: 39321.4, 60 sec: 40960.3, 300 sec: 41237.7). Total num frames: 2298691584. Throughput: 0: 10238.3. Samples: 324652206. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:13,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:15,765][626795] Updated weights for policy 0, policy_version 280612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:17,725][626795] Updated weights for policy 0, policy_version 280622 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:18,975][24592] Fps is (10 sec: 40959.8, 60 sec: 40960.0, 300 sec: 41266.3). Total num frames: 2298904576. Throughput: 0: 10229.8. Samples: 324714144. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:18,976][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:19,695][626795] Updated weights for policy 0, policy_version 280632 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:21,677][626795] Updated weights for policy 0, policy_version 280642 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:23,555][626795] Updated weights for policy 0, policy_version 280652 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:23,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41096.4, 300 sec: 41237.7). Total num frames: 2299109376. Throughput: 0: 10249.2. Samples: 324775986. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:23,976][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:25,661][626795] Updated weights for policy 0, policy_version 280662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:27,661][626795] Updated weights for policy 0, policy_version 280672 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:28,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41096.5, 300 sec: 41237.8). Total num frames: 2299322368. Throughput: 0: 10249.5. Samples: 324806472. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:28,977][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:29,691][626795] Updated weights for policy 0, policy_version 280682 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:31,625][626795] Updated weights for policy 0, policy_version 280692 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:33,517][626795] Updated weights for policy 0, policy_version 280702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:33,975][24592] Fps is (10 sec: 40960.8, 60 sec: 40960.1, 300 sec: 41210.0). Total num frames: 2299518976. Throughput: 0: 10256.7. Samples: 324868362. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:33,978][24592] Avg episode reward: [(0, '4.870')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:35,589][626795] Updated weights for policy 0, policy_version 280712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:37,557][626795] Updated weights for policy 0, policy_version 280722 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:38,976][24592] Fps is (10 sec: 40956.9, 60 sec: 41096.0, 300 sec: 41209.8). Total num frames: 2299731968. Throughput: 0: 10281.8. Samples: 324931224. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:38,977][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:39,625][626795] Updated weights for policy 0, policy_version 280732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:41,560][626795] Updated weights for policy 0, policy_version 280742 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:43,596][626795] Updated weights for policy 0, policy_version 280752 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:43,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41097.2, 300 sec: 41209.9). Total num frames: 2299936768. Throughput: 0: 10286.4. Samples: 324961674. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:43,977][24592] Avg episode reward: [(0, '5.040')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:45,576][626795] Updated weights for policy 0, policy_version 280762 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:47,576][626795] Updated weights for policy 0, policy_version 280772 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:48,975][24592] Fps is (10 sec: 40144.0, 60 sec: 40960.0, 300 sec: 41210.1). Total num frames: 2300133376. Throughput: 0: 10254.9. Samples: 325022742. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:48,978][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:49,565][626795] Updated weights for policy 0, policy_version 280782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:51,588][626795] Updated weights for policy 0, policy_version 280792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:53,438][626795] Updated weights for policy 0, policy_version 280802 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:53,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41096.5, 300 sec: 41209.9). Total num frames: 2300346368. Throughput: 0: 10288.0. Samples: 325085040. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:53,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:55,529][626795] Updated weights for policy 0, policy_version 280812 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:57,648][626795] Updated weights for policy 0, policy_version 280822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:58,976][24592] Fps is (10 sec: 41776.9, 60 sec: 40960.3, 300 sec: 41209.9). Total num frames: 2300551168. Throughput: 0: 10291.4. Samples: 325115322. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:49:58,978][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:49:59,423][626795] Updated weights for policy 0, policy_version 280832 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:01,459][626795] Updated weights for policy 0, policy_version 280842 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:03,404][626795] Updated weights for policy 0, policy_version 280852 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:03,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40960.0, 300 sec: 41209.9). Total num frames: 2300755968. Throughput: 0: 10302.1. Samples: 325177740. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:03,976][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:05,415][626795] Updated weights for policy 0, policy_version 280862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:07,241][626795] Updated weights for policy 0, policy_version 280872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:08,975][24592] Fps is (10 sec: 41781.4, 60 sec: 41233.1, 300 sec: 41209.9). Total num frames: 2300968960. Throughput: 0: 10333.2. Samples: 325240980. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:08,978][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:09,311][626795] Updated weights for policy 0, policy_version 280882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:11,239][626795] Updated weights for policy 0, policy_version 280892 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:12,643][626772] Signal inference workers to stop experience collection... (4250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:12,645][626772] Signal inference workers to resume experience collection... (4250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:12,657][626795] InferenceWorker_p0-w0: stopping experience collection (4250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:12,662][626795] InferenceWorker_p0-w0: resuming experience collection (4250 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:13,396][626795] Updated weights for policy 0, policy_version 280902 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:13,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41369.6, 300 sec: 41210.0). Total num frames: 2301173760. Throughput: 0: 10319.8. Samples: 325270866. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:13,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:15,382][626795] Updated weights for policy 0, policy_version 280912 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:17,468][626795] Updated weights for policy 0, policy_version 280922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:18,975][24592] Fps is (10 sec: 40140.6, 60 sec: 41096.5, 300 sec: 41182.2). Total num frames: 2301370368. Throughput: 0: 10298.9. Samples: 325331814. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:18,976][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:19,378][626795] Updated weights for policy 0, policy_version 280932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:21,444][626795] Updated weights for policy 0, policy_version 280942 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:23,226][626795] Updated weights for policy 0, policy_version 280952 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:23,977][24592] Fps is (10 sec: 40955.3, 60 sec: 41232.3, 300 sec: 41237.5). Total num frames: 2301583360. Throughput: 0: 10283.0. Samples: 325393962. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:23,979][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:25,316][626795] Updated weights for policy 0, policy_version 280962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:27,292][626795] Updated weights for policy 0, policy_version 280972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:28,976][24592] Fps is (10 sec: 42597.2, 60 sec: 41232.8, 300 sec: 41237.7). Total num frames: 2301796352. Throughput: 0: 10296.7. Samples: 325425030. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:28,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:29,147][626795] Updated weights for policy 0, policy_version 280982 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:31,157][626795] Updated weights for policy 0, policy_version 280992 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:33,059][626795] Updated weights for policy 0, policy_version 281002 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:33,976][24592] Fps is (10 sec: 41781.9, 60 sec: 41369.2, 300 sec: 41237.6). Total num frames: 2302001152. Throughput: 0: 10333.7. Samples: 325487766. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:33,978][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:35,174][626795] Updated weights for policy 0, policy_version 281012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:37,109][626795] Updated weights for policy 0, policy_version 281022 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:38,975][24592] Fps is (10 sec: 40961.4, 60 sec: 41233.6, 300 sec: 41210.0). Total num frames: 2302205952. Throughput: 0: 10332.3. Samples: 325549992. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:38,976][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:38,998][626795] Updated weights for policy 0, policy_version 281032 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:41,136][626795] Updated weights for policy 0, policy_version 281042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:42,997][626795] Updated weights for policy 0, policy_version 281052 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:43,976][24592] Fps is (10 sec: 40961.1, 60 sec: 41232.8, 300 sec: 41210.7). Total num frames: 2302410752. Throughput: 0: 10349.8. Samples: 325581060. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:43,978][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:45,114][626795] Updated weights for policy 0, policy_version 281062 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:47,197][626795] Updated weights for policy 0, policy_version 281072 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:48,976][24592] Fps is (10 sec: 40958.2, 60 sec: 41369.3, 300 sec: 41210.0). Total num frames: 2302615552. Throughput: 0: 10298.4. Samples: 325641174. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:48,979][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:49,238][626795] Updated weights for policy 0, policy_version 281082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:51,245][626795] Updated weights for policy 0, policy_version 281092 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:53,283][626795] Updated weights for policy 0, policy_version 281102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:53,975][24592] Fps is (10 sec: 40142.0, 60 sec: 41096.5, 300 sec: 41182.1). Total num frames: 2302812160. Throughput: 0: 10240.4. Samples: 325701798. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:53,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:55,208][626795] Updated weights for policy 0, policy_version 281112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:57,171][626795] Updated weights for policy 0, policy_version 281122 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:58,976][24592] Fps is (10 sec: 40141.3, 60 sec: 41096.7, 300 sec: 41182.4). Total num frames: 2303016960. Throughput: 0: 10255.7. Samples: 325732374. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:50:58,976][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:50:59,287][626795] Updated weights for policy 0, policy_version 281132 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:01,195][626795] Updated weights for policy 0, policy_version 281142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:03,137][626795] Updated weights for policy 0, policy_version 281152 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41233.1, 300 sec: 41182.2). Total num frames: 2303229952. Throughput: 0: 10286.9. Samples: 325794726. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:03,976][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000281156_2303229952.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:04,084][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000279950_2293350400.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:05,208][626795] Updated weights for policy 0, policy_version 281162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:07,125][626795] Updated weights for policy 0, policy_version 281172 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:08,975][24592] Fps is (10 sec: 41780.6, 60 sec: 41096.5, 300 sec: 41182.2). Total num frames: 2303434752. Throughput: 0: 10288.4. Samples: 325856928. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:08,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:09,088][626795] Updated weights for policy 0, policy_version 281182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:11,115][626795] Updated weights for policy 0, policy_version 281192 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:13,138][626795] Updated weights for policy 0, policy_version 281202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:13,981][24592] Fps is (10 sec: 40936.9, 60 sec: 41092.7, 300 sec: 41153.6). Total num frames: 2303639552. Throughput: 0: 10284.0. Samples: 325887864. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:13,983][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:15,097][626795] Updated weights for policy 0, policy_version 281212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:17,007][626795] Updated weights for policy 0, policy_version 281222 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:18,976][24592] Fps is (10 sec: 40959.0, 60 sec: 41232.9, 300 sec: 41182.1). Total num frames: 2303844352. Throughput: 0: 10241.3. Samples: 325948620. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:18,978][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:19,094][626795] Updated weights for policy 0, policy_version 281232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:21,187][626795] Updated weights for policy 0, policy_version 281242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:23,130][626795] Updated weights for policy 0, policy_version 281252 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:23,975][24592] Fps is (10 sec: 40983.1, 60 sec: 41097.4, 300 sec: 41182.2). Total num frames: 2304049152. Throughput: 0: 10226.5. Samples: 326010186. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:23,977][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:25,162][626795] Updated weights for policy 0, policy_version 281262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:27,184][626795] Updated weights for policy 0, policy_version 281272 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:28,975][24592] Fps is (10 sec: 40961.0, 60 sec: 40960.3, 300 sec: 41154.4). Total num frames: 2304253952. Throughput: 0: 10212.1. Samples: 326040600. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:28,977][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:29,142][626795] Updated weights for policy 0, policy_version 281282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:31,150][626795] Updated weights for policy 0, policy_version 281292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:33,021][626795] Updated weights for policy 0, policy_version 281302 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:33,976][24592] Fps is (10 sec: 40959.3, 60 sec: 40960.3, 300 sec: 41154.4). Total num frames: 2304458752. Throughput: 0: 10251.5. Samples: 326102490. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:33,977][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:35,078][626795] Updated weights for policy 0, policy_version 281312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:37,171][626795] Updated weights for policy 0, policy_version 281322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:38,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40960.0, 300 sec: 41154.4). Total num frames: 2304663552. Throughput: 0: 10294.1. Samples: 326165034. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:38,976][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:38,994][626795] Updated weights for policy 0, policy_version 281332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:41,010][626795] Updated weights for policy 0, policy_version 281342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:42,983][626795] Updated weights for policy 0, policy_version 281352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:43,975][24592] Fps is (10 sec: 40960.3, 60 sec: 40960.2, 300 sec: 41127.0). Total num frames: 2304868352. Throughput: 0: 10295.5. Samples: 326195670. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:43,976][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:45,030][626795] Updated weights for policy 0, policy_version 281362 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:46,955][626795] Updated weights for policy 0, policy_version 281372 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:48,975][24592] Fps is (10 sec: 40960.3, 60 sec: 40960.4, 300 sec: 41126.7). Total num frames: 2305073152. Throughput: 0: 10275.1. Samples: 326257104. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:48,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:49,049][626795] Updated weights for policy 0, policy_version 281382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:51,028][626795] Updated weights for policy 0, policy_version 281392 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:53,042][626795] Updated weights for policy 0, policy_version 281402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:53,976][24592] Fps is (10 sec: 40956.7, 60 sec: 41095.9, 300 sec: 41126.5). Total num frames: 2305277952. Throughput: 0: 10249.9. Samples: 326318184. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:53,978][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:55,141][626795] Updated weights for policy 0, policy_version 281412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:57,145][626795] Updated weights for policy 0, policy_version 281422 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:58,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41096.7, 300 sec: 41126.6). Total num frames: 2305482752. Throughput: 0: 10235.8. Samples: 326348418. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:51:58,981][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:51:59,030][626795] Updated weights for policy 0, policy_version 281432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:01,025][626795] Updated weights for policy 0, policy_version 281442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:03,140][626795] Updated weights for policy 0, policy_version 281452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:03,975][24592] Fps is (10 sec: 40963.7, 60 sec: 40960.0, 300 sec: 41126.6). Total num frames: 2305687552. Throughput: 0: 10242.5. Samples: 326409528. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:03,977][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:05,208][626795] Updated weights for policy 0, policy_version 281462 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:07,196][626795] Updated weights for policy 0, policy_version 281472 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:08,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40960.0, 300 sec: 41098.9). Total num frames: 2305892352. Throughput: 0: 10237.9. Samples: 326470890. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:08,977][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:09,196][626795] Updated weights for policy 0, policy_version 281482 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:11,150][626795] Updated weights for policy 0, policy_version 281492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:13,164][626795] Updated weights for policy 0, policy_version 281502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:13,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40963.9, 300 sec: 41098.8). Total num frames: 2306097152. Throughput: 0: 10240.5. Samples: 326501424. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:13,977][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:15,128][626795] Updated weights for policy 0, policy_version 281512 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:17,049][626795] Updated weights for policy 0, policy_version 281522 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:18,964][626795] Updated weights for policy 0, policy_version 281532 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:18,976][24592] Fps is (10 sec: 41777.2, 60 sec: 41096.4, 300 sec: 41126.6). Total num frames: 2306310144. Throughput: 0: 10245.8. Samples: 326563554. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:18,977][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:21,100][626795] Updated weights for policy 0, policy_version 281542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:23,109][626795] Updated weights for policy 0, policy_version 281552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:23,976][24592] Fps is (10 sec: 40957.1, 60 sec: 40959.5, 300 sec: 41071.0). Total num frames: 2306506752. Throughput: 0: 10213.6. Samples: 326624652. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:23,979][24592] Avg episode reward: [(0, '4.987')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:25,201][626795] Updated weights for policy 0, policy_version 281562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:27,246][626795] Updated weights for policy 0, policy_version 281572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:28,975][24592] Fps is (10 sec: 40142.5, 60 sec: 40960.0, 300 sec: 41098.8). Total num frames: 2306711552. Throughput: 0: 10208.2. Samples: 326655036. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:28,976][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:29,101][626795] Updated weights for policy 0, policy_version 281582 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:31,200][626795] Updated weights for policy 0, policy_version 281592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:33,100][626795] Updated weights for policy 0, policy_version 281602 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:33,975][24592] Fps is (10 sec: 40962.9, 60 sec: 40960.1, 300 sec: 41098.9). Total num frames: 2306916352. Throughput: 0: 10214.0. Samples: 326716734. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:33,976][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:35,157][626795] Updated weights for policy 0, policy_version 281612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:37,157][626795] Updated weights for policy 0, policy_version 281622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:38,976][24592] Fps is (10 sec: 40959.3, 60 sec: 40959.9, 300 sec: 41071.0). Total num frames: 2307121152. Throughput: 0: 10236.3. Samples: 326778810. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:38,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:39,076][626795] Updated weights for policy 0, policy_version 281632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:41,043][626795] Updated weights for policy 0, policy_version 281642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:42,933][626795] Updated weights for policy 0, policy_version 281652 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:43,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40960.1, 300 sec: 41071.1). Total num frames: 2307325952. Throughput: 0: 10252.4. Samples: 326809776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:43,976][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:45,022][626795] Updated weights for policy 0, policy_version 281662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:46,989][626795] Updated weights for policy 0, policy_version 281672 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:48,948][626795] Updated weights for policy 0, policy_version 281682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:48,980][24592] Fps is (10 sec: 41762.0, 60 sec: 41093.5, 300 sec: 41070.5). Total num frames: 2307538944. Throughput: 0: 10274.8. Samples: 326871936. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:48,981][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:50,946][626795] Updated weights for policy 0, policy_version 281692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:52,962][626795] Updated weights for policy 0, policy_version 281702 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:53,976][24592] Fps is (10 sec: 41777.0, 60 sec: 41096.8, 300 sec: 41071.0). Total num frames: 2307743744. Throughput: 0: 10273.9. Samples: 326933220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:53,980][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:55,020][626795] Updated weights for policy 0, policy_version 281712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:57,119][626795] Updated weights for policy 0, policy_version 281722 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:58,975][24592] Fps is (10 sec: 40158.2, 60 sec: 40960.0, 300 sec: 41043.3). Total num frames: 2307940352. Throughput: 0: 10264.5. Samples: 326963328. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:52:58,977][24592] Avg episode reward: [(0, '5.021')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:52:59,012][626795] Updated weights for policy 0, policy_version 281732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:01,046][626795] Updated weights for policy 0, policy_version 281742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:03,016][626795] Updated weights for policy 0, policy_version 281752 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:03,975][24592] Fps is (10 sec: 40142.6, 60 sec: 40960.0, 300 sec: 41071.1). Total num frames: 2308145152. Throughput: 0: 10252.6. Samples: 327024918. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:03,977][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000281756_2308145152.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:04,105][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000280554_2298298368.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:05,129][626795] Updated weights for policy 0, policy_version 281762 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:07,058][626795] Updated weights for policy 0, policy_version 281772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:08,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41096.5, 300 sec: 41098.9). Total num frames: 2308358144. Throughput: 0: 10262.3. Samples: 327086448. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:08,976][626795] Updated weights for policy 0, policy_version 281782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:08,977][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:10,982][626795] Updated weights for policy 0, policy_version 281792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:13,066][626795] Updated weights for policy 0, policy_version 281802 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41096.5, 300 sec: 41071.1). Total num frames: 2308562944. Throughput: 0: 10269.2. Samples: 327117150. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:13,977][24592] Avg episode reward: [(0, '4.798')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:14,963][626795] Updated weights for policy 0, policy_version 281812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:16,893][626795] Updated weights for policy 0, policy_version 281822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:18,901][626795] Updated weights for policy 0, policy_version 281832 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:18,976][24592] Fps is (10 sec: 40958.2, 60 sec: 40960.0, 300 sec: 41098.8). Total num frames: 2308767744. Throughput: 0: 10285.4. Samples: 327179580. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:18,977][24592] Avg episode reward: [(0, '4.462')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:20,829][626795] Updated weights for policy 0, policy_version 281842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:22,870][626795] Updated weights for policy 0, policy_version 281852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:23,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41097.0, 300 sec: 41071.1). Total num frames: 2308972544. Throughput: 0: 10299.9. Samples: 327242304. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:23,976][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:24,878][626795] Updated weights for policy 0, policy_version 281862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:26,857][626795] Updated weights for policy 0, policy_version 281872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:28,771][626795] Updated weights for policy 0, policy_version 281882 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:28,976][24592] Fps is (10 sec: 40958.9, 60 sec: 41096.1, 300 sec: 41071.0). Total num frames: 2309177344. Throughput: 0: 10281.4. Samples: 327272448. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:28,977][24592] Avg episode reward: [(0, '4.893')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:30,889][626795] Updated weights for policy 0, policy_version 281892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:32,761][626795] Updated weights for policy 0, policy_version 281902 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:33,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41096.5, 300 sec: 41071.1). Total num frames: 2309382144. Throughput: 0: 10278.8. Samples: 327334440. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:33,977][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:34,825][626795] Updated weights for policy 0, policy_version 281912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:36,832][626795] Updated weights for policy 0, policy_version 281922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:38,856][626795] Updated weights for policy 0, policy_version 281932 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:38,975][24592] Fps is (10 sec: 40962.7, 60 sec: 41096.7, 300 sec: 41071.2). Total num frames: 2309586944. Throughput: 0: 10285.4. Samples: 327396060. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:38,977][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:40,739][626795] Updated weights for policy 0, policy_version 281942 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:42,750][626795] Updated weights for policy 0, policy_version 281952 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:43,976][24592] Fps is (10 sec: 41776.5, 60 sec: 41232.6, 300 sec: 41098.7). Total num frames: 2309799936. Throughput: 0: 10301.2. Samples: 327426888. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:43,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:44,784][626795] Updated weights for policy 0, policy_version 281962 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:46,658][626795] Updated weights for policy 0, policy_version 281972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:48,608][626795] Updated weights for policy 0, policy_version 281982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:48,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41099.5, 300 sec: 41098.9). Total num frames: 2310004736. Throughput: 0: 10334.8. Samples: 327489984. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:48,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:50,638][626795] Updated weights for policy 0, policy_version 281992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:52,601][626795] Updated weights for policy 0, policy_version 282002 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:53,976][24592] Fps is (10 sec: 41781.5, 60 sec: 41233.3, 300 sec: 41099.0). Total num frames: 2310217728. Throughput: 0: 10338.8. Samples: 327551694. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:53,979][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:54,647][626795] Updated weights for policy 0, policy_version 282012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:56,665][626795] Updated weights for policy 0, policy_version 282022 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:53:58,532][626795] Updated weights for policy 0, policy_version 282032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:58,976][24592] Fps is (10 sec: 40959.0, 60 sec: 41232.9, 300 sec: 41071.0). Total num frames: 2310414336. Throughput: 0: 10342.9. Samples: 327582582. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:53:58,978][24592] Avg episode reward: [(0, '4.886')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:00,693][626795] Updated weights for policy 0, policy_version 282042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:02,648][626795] Updated weights for policy 0, policy_version 282052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:03,975][24592] Fps is (10 sec: 40141.2, 60 sec: 41233.1, 300 sec: 41098.8). Total num frames: 2310619136. Throughput: 0: 10307.3. Samples: 327643404. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:03,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:04,754][626795] Updated weights for policy 0, policy_version 282062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:06,631][626795] Updated weights for policy 0, policy_version 282072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:08,605][626795] Updated weights for policy 0, policy_version 282082 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:08,975][24592] Fps is (10 sec: 40960.8, 60 sec: 41096.5, 300 sec: 41126.6). Total num frames: 2310823936. Throughput: 0: 10282.9. Samples: 327705036. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:08,976][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:10,621][626795] Updated weights for policy 0, policy_version 282092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:12,540][626795] Updated weights for policy 0, policy_version 282102 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:13,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41232.9, 300 sec: 41126.6). Total num frames: 2311036928. Throughput: 0: 10303.3. Samples: 327736092. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:13,977][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:14,588][626795] Updated weights for policy 0, policy_version 282112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:16,505][626795] Updated weights for policy 0, policy_version 282122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:18,493][626795] Updated weights for policy 0, policy_version 282132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41233.3, 300 sec: 41126.6). Total num frames: 2311241728. Throughput: 0: 10322.3. Samples: 327798942. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:18,979][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:20,438][626795] Updated weights for policy 0, policy_version 282142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:22,412][626795] Updated weights for policy 0, policy_version 282152 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:23,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41369.6, 300 sec: 41126.6). Total num frames: 2311454720. Throughput: 0: 10323.6. Samples: 327860622. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:23,977][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:24,507][626795] Updated weights for policy 0, policy_version 282162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:26,379][626795] Updated weights for policy 0, policy_version 282172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:28,320][626795] Updated weights for policy 0, policy_version 282182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:28,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41233.5, 300 sec: 41126.6). Total num frames: 2311651328. Throughput: 0: 10336.9. Samples: 327892044. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:28,976][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:30,423][626795] Updated weights for policy 0, policy_version 282192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:32,466][626795] Updated weights for policy 0, policy_version 282202 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:33,975][24592] Fps is (10 sec: 40140.3, 60 sec: 41233.0, 300 sec: 41098.9). Total num frames: 2311856128. Throughput: 0: 10282.8. Samples: 327952710. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:33,976][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:34,453][626795] Updated weights for policy 0, policy_version 282212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:36,511][626795] Updated weights for policy 0, policy_version 282222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:38,426][626795] Updated weights for policy 0, policy_version 282232 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:38,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41233.0, 300 sec: 41098.8). Total num frames: 2312060928. Throughput: 0: 10277.4. Samples: 328014174. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:38,977][24592] Avg episode reward: [(0, '4.347')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:40,473][626795] Updated weights for policy 0, policy_version 282242 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:42,475][626795] Updated weights for policy 0, policy_version 282252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:43,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41097.0, 300 sec: 41126.6). Total num frames: 2312265728. Throughput: 0: 10266.7. Samples: 328044582. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:43,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:44,435][626795] Updated weights for policy 0, policy_version 282262 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:46,422][626795] Updated weights for policy 0, policy_version 282272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:48,460][626795] Updated weights for policy 0, policy_version 282282 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:48,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41096.5, 300 sec: 41098.8). Total num frames: 2312470528. Throughput: 0: 10286.0. Samples: 328106274. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:48,978][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:50,442][626795] Updated weights for policy 0, policy_version 282292 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:52,439][626795] Updated weights for policy 0, policy_version 282302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:53,976][24592] Fps is (10 sec: 40958.5, 60 sec: 40959.9, 300 sec: 41098.9). Total num frames: 2312675328. Throughput: 0: 10283.9. Samples: 328167816. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:53,980][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:54,356][626795] Updated weights for policy 0, policy_version 282312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:56,380][626795] Updated weights for policy 0, policy_version 282322 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:54:58,290][626795] Updated weights for policy 0, policy_version 282332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:58,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41233.2, 300 sec: 41126.6). Total num frames: 2312888320. Throughput: 0: 10278.9. Samples: 328198638. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:54:58,977][24592] Avg episode reward: [(0, '4.996')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:00,458][626795] Updated weights for policy 0, policy_version 282342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:02,369][626795] Updated weights for policy 0, policy_version 282352 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:03,976][24592] Fps is (10 sec: 41779.6, 60 sec: 41232.9, 300 sec: 41098.8). Total num frames: 2313093120. Throughput: 0: 10242.2. Samples: 328259844. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:03,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000282360_2313093120.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:04,092][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000281156_2303229952.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:04,478][626795] Updated weights for policy 0, policy_version 282362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:06,503][626795] Updated weights for policy 0, policy_version 282372 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:08,486][626795] Updated weights for policy 0, policy_version 282382 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:08,978][24592] Fps is (10 sec: 40140.7, 60 sec: 41096.5, 300 sec: 41071.1). Total num frames: 2313289728. Throughput: 0: 10234.9. Samples: 328321194. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:08,982][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:10,569][626795] Updated weights for policy 0, policy_version 282392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:12,445][626795] Updated weights for policy 0, policy_version 282402 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:13,976][24592] Fps is (10 sec: 40139.9, 60 sec: 40959.8, 300 sec: 41098.8). Total num frames: 2313494528. Throughput: 0: 10207.1. Samples: 328351368. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:13,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:14,566][626795] Updated weights for policy 0, policy_version 282412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:16,583][626795] Updated weights for policy 0, policy_version 282422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:18,550][626795] Updated weights for policy 0, policy_version 282432 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:18,975][24592] Fps is (10 sec: 40141.0, 60 sec: 40823.5, 300 sec: 41043.5). Total num frames: 2313691136. Throughput: 0: 10200.8. Samples: 328411746. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:18,976][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:20,565][626795] Updated weights for policy 0, policy_version 282442 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:22,550][626795] Updated weights for policy 0, policy_version 282452 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:23,975][24592] Fps is (10 sec: 40961.9, 60 sec: 40823.4, 300 sec: 41043.4). Total num frames: 2313904128. Throughput: 0: 10213.3. Samples: 328473774. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:23,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:24,646][626795] Updated weights for policy 0, policy_version 282462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:26,465][626795] Updated weights for policy 0, policy_version 282472 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:28,540][626795] Updated weights for policy 0, policy_version 282482 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:28,975][24592] Fps is (10 sec: 41778.6, 60 sec: 40960.0, 300 sec: 41043.4). Total num frames: 2314108928. Throughput: 0: 10226.0. Samples: 328504752. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:28,977][24592] Avg episode reward: [(0, '4.446')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:30,553][626795] Updated weights for policy 0, policy_version 282492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:32,443][626795] Updated weights for policy 0, policy_version 282502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:33,976][24592] Fps is (10 sec: 40958.5, 60 sec: 40959.8, 300 sec: 41043.3). Total num frames: 2314313728. Throughput: 0: 10227.4. Samples: 328566510. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:33,978][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:34,618][626795] Updated weights for policy 0, policy_version 282512 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:36,574][626795] Updated weights for policy 0, policy_version 282522 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:38,521][626795] Updated weights for policy 0, policy_version 282532 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:38,976][24592] Fps is (10 sec: 40959.4, 60 sec: 40959.8, 300 sec: 41043.3). Total num frames: 2314518528. Throughput: 0: 10220.6. Samples: 328627740. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:38,976][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:40,543][626795] Updated weights for policy 0, policy_version 282542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:42,553][626795] Updated weights for policy 0, policy_version 282552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:43,975][24592] Fps is (10 sec: 40142.3, 60 sec: 40823.4, 300 sec: 41015.6). Total num frames: 2314715136. Throughput: 0: 10201.7. Samples: 328657716. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:43,979][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:44,692][626795] Updated weights for policy 0, policy_version 282562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:46,575][626795] Updated weights for policy 0, policy_version 282572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:48,609][626795] Updated weights for policy 0, policy_version 282582 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:48,975][24592] Fps is (10 sec: 40141.9, 60 sec: 40823.5, 300 sec: 41043.3). Total num frames: 2314919936. Throughput: 0: 10191.9. Samples: 328718478. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:48,976][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:50,709][626795] Updated weights for policy 0, policy_version 282592 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:52,724][626795] Updated weights for policy 0, policy_version 282602 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:53,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40823.7, 300 sec: 41043.3). Total num frames: 2315124736. Throughput: 0: 10190.7. Samples: 328779774. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:53,977][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:54,686][626795] Updated weights for policy 0, policy_version 282612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:56,652][626795] Updated weights for policy 0, policy_version 282622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:55:58,560][626795] Updated weights for policy 0, policy_version 282632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:58,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40686.9, 300 sec: 41015.5). Total num frames: 2315329536. Throughput: 0: 10202.6. Samples: 328810482. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:55:58,978][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:00,632][626795] Updated weights for policy 0, policy_version 282642 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:02,511][626795] Updated weights for policy 0, policy_version 282652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:03,980][24592] Fps is (10 sec: 41759.2, 60 sec: 40820.4, 300 sec: 41042.6). Total num frames: 2315542528. Throughput: 0: 10245.4. Samples: 328872840. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:03,981][24592] Avg episode reward: [(0, '4.324')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:04,569][626795] Updated weights for policy 0, policy_version 282662 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:06,667][626795] Updated weights for policy 0, policy_version 282672 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:08,600][626795] Updated weights for policy 0, policy_version 282682 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:08,975][24592] Fps is (10 sec: 41778.9, 60 sec: 40959.9, 300 sec: 41044.1). Total num frames: 2315747328. Throughput: 0: 10226.9. Samples: 328933986. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:08,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:10,676][626795] Updated weights for policy 0, policy_version 282692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:12,738][626795] Updated weights for policy 0, policy_version 282702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:13,976][24592] Fps is (10 sec: 40159.0, 60 sec: 40823.6, 300 sec: 41015.5). Total num frames: 2315943936. Throughput: 0: 10200.2. Samples: 328963764. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:13,978][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:14,679][626795] Updated weights for policy 0, policy_version 282712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:16,699][626795] Updated weights for policy 0, policy_version 282722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:18,624][626795] Updated weights for policy 0, policy_version 282732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:18,978][24592] Fps is (10 sec: 40130.6, 60 sec: 40958.2, 300 sec: 41015.2). Total num frames: 2316148736. Throughput: 0: 10199.4. Samples: 329025504. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:18,980][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:20,747][626795] Updated weights for policy 0, policy_version 282742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:22,652][626795] Updated weights for policy 0, policy_version 282752 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:23,975][24592] Fps is (10 sec: 40961.2, 60 sec: 40823.5, 300 sec: 41015.5). Total num frames: 2316353536. Throughput: 0: 10202.1. Samples: 329086830. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:23,977][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:24,658][626795] Updated weights for policy 0, policy_version 282762 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:26,588][626795] Updated weights for policy 0, policy_version 282772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:28,661][626795] Updated weights for policy 0, policy_version 282782 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:28,975][24592] Fps is (10 sec: 41790.3, 60 sec: 40960.1, 300 sec: 41043.3). Total num frames: 2316566528. Throughput: 0: 10222.5. Samples: 329117730. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:28,977][24592] Avg episode reward: [(0, '4.421')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:30,603][626795] Updated weights for policy 0, policy_version 282792 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:32,568][626795] Updated weights for policy 0, policy_version 282802 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40960.3, 300 sec: 41043.3). Total num frames: 2316771328. Throughput: 0: 10250.4. Samples: 329179746. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:33,977][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:34,574][626795] Updated weights for policy 0, policy_version 282812 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:36,587][626795] Updated weights for policy 0, policy_version 282822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:38,532][626795] Updated weights for policy 0, policy_version 282832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:38,975][24592] Fps is (10 sec: 40140.4, 60 sec: 40823.6, 300 sec: 41015.5). Total num frames: 2316967936. Throughput: 0: 10266.0. Samples: 329241744. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:38,977][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:40,656][626795] Updated weights for policy 0, policy_version 282842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:42,691][626795] Updated weights for policy 0, policy_version 282852 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:43,976][24592] Fps is (10 sec: 40140.1, 60 sec: 40959.9, 300 sec: 41015.5). Total num frames: 2317172736. Throughput: 0: 10242.2. Samples: 329271384. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:43,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:44,642][626795] Updated weights for policy 0, policy_version 282862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:46,708][626795] Updated weights for policy 0, policy_version 282872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:48,670][626795] Updated weights for policy 0, policy_version 282882 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:48,976][24592] Fps is (10 sec: 40959.7, 60 sec: 40959.9, 300 sec: 41015.6). Total num frames: 2317377536. Throughput: 0: 10214.7. Samples: 329332452. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:48,977][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:50,767][626795] Updated weights for policy 0, policy_version 282892 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:52,588][626795] Updated weights for policy 0, policy_version 282902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:53,975][24592] Fps is (10 sec: 40960.4, 60 sec: 40960.0, 300 sec: 41015.5). Total num frames: 2317582336. Throughput: 0: 10230.7. Samples: 329394366. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:53,976][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:54,665][626795] Updated weights for policy 0, policy_version 282912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:56,633][626795] Updated weights for policy 0, policy_version 282922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:56:58,547][626795] Updated weights for policy 0, policy_version 282932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:58,975][24592] Fps is (10 sec: 40960.9, 60 sec: 40960.1, 300 sec: 41015.5). Total num frames: 2317787136. Throughput: 0: 10240.3. Samples: 329424576. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:56:58,976][24592] Avg episode reward: [(0, '5.034')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:00,700][626795] Updated weights for policy 0, policy_version 282942 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:01,787][626772] Signal inference workers to stop experience collection... (4300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:01,789][626772] Signal inference workers to resume experience collection... (4300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:01,800][626795] InferenceWorker_p0-w0: stopping experience collection (4300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:01,806][626795] InferenceWorker_p0-w0: resuming experience collection (4300 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:02,647][626795] Updated weights for policy 0, policy_version 282952 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:03,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40826.7, 300 sec: 41015.5). Total num frames: 2317991936. Throughput: 0: 10240.7. Samples: 329486310. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:03,978][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000282958_2317991936.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:04,042][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000281756_2308145152.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:04,741][626795] Updated weights for policy 0, policy_version 282962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:06,714][626795] Updated weights for policy 0, policy_version 282972 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:08,887][626795] Updated weights for policy 0, policy_version 282982 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:08,975][24592] Fps is (10 sec: 40959.8, 60 sec: 40823.6, 300 sec: 41015.5). Total num frames: 2318196736. Throughput: 0: 10233.3. Samples: 329547330. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:08,978][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:10,670][626795] Updated weights for policy 0, policy_version 282992 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:12,804][626795] Updated weights for policy 0, policy_version 283002 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:13,975][24592] Fps is (10 sec: 40140.8, 60 sec: 40823.7, 300 sec: 40960.1). Total num frames: 2318393344. Throughput: 0: 10199.3. Samples: 329576700. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:13,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:14,929][626795] Updated weights for policy 0, policy_version 283012 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:16,792][626795] Updated weights for policy 0, policy_version 283022 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:18,907][626795] Updated weights for policy 0, policy_version 283032 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:18,976][24592] Fps is (10 sec: 40139.0, 60 sec: 40825.0, 300 sec: 40987.8). Total num frames: 2318598144. Throughput: 0: 10180.2. Samples: 329637858. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:18,978][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:20,878][626795] Updated weights for policy 0, policy_version 283042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:22,852][626795] Updated weights for policy 0, policy_version 283052 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:23,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40823.4, 300 sec: 40987.8). Total num frames: 2318802944. Throughput: 0: 10169.5. Samples: 329699370. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:23,978][24592] Avg episode reward: [(0, '5.029')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:24,812][626795] Updated weights for policy 0, policy_version 283062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:26,895][626795] Updated weights for policy 0, policy_version 283072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:28,729][626795] Updated weights for policy 0, policy_version 283082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:28,977][24592] Fps is (10 sec: 40955.1, 60 sec: 40685.8, 300 sec: 40987.5). Total num frames: 2319007744. Throughput: 0: 10202.1. Samples: 329730492. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:28,979][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:30,719][626795] Updated weights for policy 0, policy_version 283092 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:32,693][626795] Updated weights for policy 0, policy_version 283102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:33,975][24592] Fps is (10 sec: 41779.6, 60 sec: 40823.5, 300 sec: 41015.6). Total num frames: 2319220736. Throughput: 0: 10236.7. Samples: 329793102. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:33,977][24592] Avg episode reward: [(0, '4.957')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:34,698][626795] Updated weights for policy 0, policy_version 283112 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:36,716][626795] Updated weights for policy 0, policy_version 283122 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:38,670][626795] Updated weights for policy 0, policy_version 283132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:38,975][24592] Fps is (10 sec: 41785.7, 60 sec: 40960.0, 300 sec: 41015.5). Total num frames: 2319425536. Throughput: 0: 10240.3. Samples: 329855178. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:38,977][24592] Avg episode reward: [(0, '5.027')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:40,660][626795] Updated weights for policy 0, policy_version 283142 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:42,598][626795] Updated weights for policy 0, policy_version 283152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:43,976][24592] Fps is (10 sec: 40959.2, 60 sec: 40960.0, 300 sec: 40988.3). Total num frames: 2319630336. Throughput: 0: 10251.7. Samples: 329885904. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:43,978][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:44,777][626795] Updated weights for policy 0, policy_version 283162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:46,677][626795] Updated weights for policy 0, policy_version 283172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:48,745][626795] Updated weights for policy 0, policy_version 283182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:48,976][24592] Fps is (10 sec: 40957.6, 60 sec: 40959.7, 300 sec: 40987.7). Total num frames: 2319835136. Throughput: 0: 10235.2. Samples: 329946900. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:48,983][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:50,788][626795] Updated weights for policy 0, policy_version 283192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:52,638][626795] Updated weights for policy 0, policy_version 283202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:53,975][24592] Fps is (10 sec: 40960.7, 60 sec: 40960.0, 300 sec: 41015.5). Total num frames: 2320039936. Throughput: 0: 10240.4. Samples: 330008148. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:53,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:54,706][626795] Updated weights for policy 0, policy_version 283212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:56,779][626795] Updated weights for policy 0, policy_version 283222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:57:58,669][626795] Updated weights for policy 0, policy_version 283232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:58,975][24592] Fps is (10 sec: 40962.5, 60 sec: 40959.9, 300 sec: 41015.5). Total num frames: 2320244736. Throughput: 0: 10267.6. Samples: 330038742. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:57:58,976][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:00,700][626795] Updated weights for policy 0, policy_version 283242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:02,690][626795] Updated weights for policy 0, policy_version 283252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:03,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41096.5, 300 sec: 41015.5). Total num frames: 2320457728. Throughput: 0: 10295.9. Samples: 330101172. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:03,978][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:04,569][626795] Updated weights for policy 0, policy_version 283262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:06,591][626795] Updated weights for policy 0, policy_version 283272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:08,472][626795] Updated weights for policy 0, policy_version 283282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:08,976][24592] Fps is (10 sec: 40959.7, 60 sec: 40959.9, 300 sec: 40987.7). Total num frames: 2320654336. Throughput: 0: 10318.0. Samples: 330163680. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:08,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:10,563][626795] Updated weights for policy 0, policy_version 283292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:12,493][626795] Updated weights for policy 0, policy_version 283302 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:13,975][24592] Fps is (10 sec: 40960.8, 60 sec: 41233.1, 300 sec: 41015.6). Total num frames: 2320867328. Throughput: 0: 10303.7. Samples: 330194142. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:13,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:14,503][626795] Updated weights for policy 0, policy_version 283312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:16,503][626795] Updated weights for policy 0, policy_version 283322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:18,514][626795] Updated weights for policy 0, policy_version 283332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:18,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41233.3, 300 sec: 41015.5). Total num frames: 2321072128. Throughput: 0: 10272.6. Samples: 330255372. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:18,978][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:20,597][626795] Updated weights for policy 0, policy_version 283342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:22,669][626795] Updated weights for policy 0, policy_version 283352 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:23,975][24592] Fps is (10 sec: 40959.5, 60 sec: 41233.1, 300 sec: 41015.6). Total num frames: 2321276928. Throughput: 0: 10258.3. Samples: 330316800. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:23,976][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:24,552][626795] Updated weights for policy 0, policy_version 283362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:26,532][626795] Updated weights for policy 0, policy_version 283372 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:28,512][626795] Updated weights for policy 0, policy_version 283382 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:28,975][24592] Fps is (10 sec: 40141.2, 60 sec: 41097.7, 300 sec: 40987.8). Total num frames: 2321473536. Throughput: 0: 10264.3. Samples: 330347796. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:28,977][24592] Avg episode reward: [(0, '4.948')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:30,522][626795] Updated weights for policy 0, policy_version 283392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:32,502][626795] Updated weights for policy 0, policy_version 283402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:33,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41096.5, 300 sec: 41015.5). Total num frames: 2321686528. Throughput: 0: 10278.7. Samples: 330409434. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:33,976][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:34,506][626795] Updated weights for policy 0, policy_version 283412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:36,485][626795] Updated weights for policy 0, policy_version 283422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:38,323][626795] Updated weights for policy 0, policy_version 283432 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:38,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41233.2, 300 sec: 41015.6). Total num frames: 2321899520. Throughput: 0: 10313.5. Samples: 330472254. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:38,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:40,390][626795] Updated weights for policy 0, policy_version 283442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:42,409][626795] Updated weights for policy 0, policy_version 283452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41233.2, 300 sec: 41015.5). Total num frames: 2322104320. Throughput: 0: 10317.4. Samples: 330503022. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:43,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:44,419][626795] Updated weights for policy 0, policy_version 283462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:46,259][626795] Updated weights for policy 0, policy_version 283472 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:48,276][626795] Updated weights for policy 0, policy_version 283482 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:48,976][24592] Fps is (10 sec: 40957.8, 60 sec: 41233.2, 300 sec: 40987.7). Total num frames: 2322309120. Throughput: 0: 10286.8. Samples: 330564084. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:48,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:50,448][626795] Updated weights for policy 0, policy_version 283492 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:52,534][626795] Updated weights for policy 0, policy_version 283502 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:53,976][24592] Fps is (10 sec: 40137.4, 60 sec: 41095.9, 300 sec: 40987.7). Total num frames: 2322505728. Throughput: 0: 10257.8. Samples: 330625290. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:53,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:54,439][626795] Updated weights for policy 0, policy_version 283512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:56,470][626795] Updated weights for policy 0, policy_version 283522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:58:58,511][626795] Updated weights for policy 0, policy_version 283532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:58,976][24592] Fps is (10 sec: 40142.1, 60 sec: 41096.5, 300 sec: 40987.8). Total num frames: 2322710528. Throughput: 0: 10251.9. Samples: 330655482. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:58:58,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:00,536][626795] Updated weights for policy 0, policy_version 283542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:02,426][626795] Updated weights for policy 0, policy_version 283552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:03,975][24592] Fps is (10 sec: 41782.8, 60 sec: 41096.6, 300 sec: 41015.5). Total num frames: 2322923520. Throughput: 0: 10262.8. Samples: 330717198. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:03,976][24592] Avg episode reward: [(0, '4.435')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000283560_2322923520.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:04,044][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000282360_2313093120.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:04,535][626795] Updated weights for policy 0, policy_version 283562 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:06,533][626795] Updated weights for policy 0, policy_version 283572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:08,440][626795] Updated weights for policy 0, policy_version 283582 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:08,975][24592] Fps is (10 sec: 40960.8, 60 sec: 41096.7, 300 sec: 40960.0). Total num frames: 2323120128. Throughput: 0: 10266.9. Samples: 330778812. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:08,977][24592] Avg episode reward: [(0, '4.791')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:10,404][626795] Updated weights for policy 0, policy_version 283592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:12,503][626795] Updated weights for policy 0, policy_version 283602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:13,976][24592] Fps is (10 sec: 40139.5, 60 sec: 40959.8, 300 sec: 40960.0). Total num frames: 2323324928. Throughput: 0: 10257.3. Samples: 330809376. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:13,976][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:14,510][626795] Updated weights for policy 0, policy_version 283612 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:16,423][626795] Updated weights for policy 0, policy_version 283622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:18,511][626795] Updated weights for policy 0, policy_version 283632 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:18,976][24592] Fps is (10 sec: 40959.1, 60 sec: 40959.9, 300 sec: 40932.2). Total num frames: 2323529728. Throughput: 0: 10250.8. Samples: 330870720. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:18,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:20,534][626795] Updated weights for policy 0, policy_version 283642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:22,639][626795] Updated weights for policy 0, policy_version 283652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:23,976][24592] Fps is (10 sec: 40959.2, 60 sec: 40959.7, 300 sec: 40959.9). Total num frames: 2323734528. Throughput: 0: 10202.0. Samples: 330931350. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:23,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:24,568][626795] Updated weights for policy 0, policy_version 283662 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:26,559][626795] Updated weights for policy 0, policy_version 283672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:28,540][626795] Updated weights for policy 0, policy_version 283682 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:28,975][24592] Fps is (10 sec: 40960.7, 60 sec: 41096.5, 300 sec: 40960.0). Total num frames: 2323939328. Throughput: 0: 10201.7. Samples: 330962100. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:28,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:30,549][626795] Updated weights for policy 0, policy_version 283692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:32,537][626795] Updated weights for policy 0, policy_version 283702 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:33,975][24592] Fps is (10 sec: 40961.9, 60 sec: 40960.0, 300 sec: 40960.0). Total num frames: 2324144128. Throughput: 0: 10213.3. Samples: 331023678. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:33,977][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:34,593][626795] Updated weights for policy 0, policy_version 283712 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:36,497][626795] Updated weights for policy 0, policy_version 283722 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:38,530][626795] Updated weights for policy 0, policy_version 283732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:38,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40823.5, 300 sec: 40960.0). Total num frames: 2324348928. Throughput: 0: 10230.9. Samples: 331085670. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:38,976][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:40,460][626795] Updated weights for policy 0, policy_version 283742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:42,360][626795] Updated weights for policy 0, policy_version 283752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:43,975][24592] Fps is (10 sec: 40960.3, 60 sec: 40823.5, 300 sec: 40960.0). Total num frames: 2324553728. Throughput: 0: 10246.4. Samples: 331116570. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:43,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:44,459][626795] Updated weights for policy 0, policy_version 283762 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:46,376][626795] Updated weights for policy 0, policy_version 283772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:48,371][626795] Updated weights for policy 0, policy_version 283782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:48,976][24592] Fps is (10 sec: 40957.5, 60 sec: 40823.4, 300 sec: 40960.0). Total num frames: 2324758528. Throughput: 0: 10245.9. Samples: 331178268. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:48,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:50,451][626795] Updated weights for policy 0, policy_version 283792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:52,479][626795] Updated weights for policy 0, policy_version 283802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:53,976][24592] Fps is (10 sec: 40957.4, 60 sec: 40960.2, 300 sec: 40932.1). Total num frames: 2324963328. Throughput: 0: 10230.0. Samples: 331239168. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:53,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:54,490][626795] Updated weights for policy 0, policy_version 283812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:56,610][626795] Updated weights for policy 0, policy_version 283822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 16:59:58,610][626795] Updated weights for policy 0, policy_version 283832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:58,975][24592] Fps is (10 sec: 40142.8, 60 sec: 40823.5, 300 sec: 40904.5). Total num frames: 2325159936. Throughput: 0: 10214.2. Samples: 331269012. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 16:59:58,977][24592] Avg episode reward: [(0, '4.830')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:00,652][626795] Updated weights for policy 0, policy_version 283842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:02,660][626795] Updated weights for policy 0, policy_version 283852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:03,976][24592] Fps is (10 sec: 40139.5, 60 sec: 40686.3, 300 sec: 40932.1). Total num frames: 2325364736. Throughput: 0: 10200.1. Samples: 331329732. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:03,978][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:04,713][626795] Updated weights for policy 0, policy_version 283862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:06,712][626795] Updated weights for policy 0, policy_version 283872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:08,697][626795] Updated weights for policy 0, policy_version 283882 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:08,980][24592] Fps is (10 sec: 40941.5, 60 sec: 40820.3, 300 sec: 40931.7). Total num frames: 2325569536. Throughput: 0: 10214.4. Samples: 331391040. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:08,981][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:10,651][626795] Updated weights for policy 0, policy_version 283892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:12,713][626795] Updated weights for policy 0, policy_version 283902 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:13,976][24592] Fps is (10 sec: 40960.1, 60 sec: 40823.0, 300 sec: 40959.9). Total num frames: 2325774336. Throughput: 0: 10212.5. Samples: 331421670. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:13,979][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:14,717][626795] Updated weights for policy 0, policy_version 283912 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:16,569][626795] Updated weights for policy 0, policy_version 283922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:18,622][626795] Updated weights for policy 0, policy_version 283932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:18,975][24592] Fps is (10 sec: 40978.5, 60 sec: 40823.5, 300 sec: 40932.2). Total num frames: 2325979136. Throughput: 0: 10221.2. Samples: 331483632. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:18,976][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:20,662][626795] Updated weights for policy 0, policy_version 283942 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:22,644][626795] Updated weights for policy 0, policy_version 283952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:23,976][24592] Fps is (10 sec: 40963.1, 60 sec: 40823.7, 300 sec: 40932.2). Total num frames: 2326183936. Throughput: 0: 10205.4. Samples: 331544916. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:23,977][24592] Avg episode reward: [(0, '4.914')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:24,781][626795] Updated weights for policy 0, policy_version 283962 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:26,701][626795] Updated weights for policy 0, policy_version 283972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:28,642][626795] Updated weights for policy 0, policy_version 283982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:28,976][24592] Fps is (10 sec: 40957.9, 60 sec: 40823.1, 300 sec: 40932.2). Total num frames: 2326388736. Throughput: 0: 10192.1. Samples: 331575222. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:28,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:30,714][626795] Updated weights for policy 0, policy_version 283992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:32,759][626795] Updated weights for policy 0, policy_version 284002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:33,976][24592] Fps is (10 sec: 40959.5, 60 sec: 40823.3, 300 sec: 40932.2). Total num frames: 2326593536. Throughput: 0: 10184.6. Samples: 331636572. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:33,977][24592] Avg episode reward: [(0, '4.870')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:34,664][626795] Updated weights for policy 0, policy_version 284012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:36,629][626795] Updated weights for policy 0, policy_version 284022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:38,636][626795] Updated weights for policy 0, policy_version 284032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:38,975][24592] Fps is (10 sec: 40962.5, 60 sec: 40823.5, 300 sec: 40960.0). Total num frames: 2326798336. Throughput: 0: 10200.4. Samples: 331698180. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:38,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:40,636][626795] Updated weights for policy 0, policy_version 284042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:42,608][626795] Updated weights for policy 0, policy_version 284052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:43,982][24592] Fps is (10 sec: 40934.5, 60 sec: 40819.0, 300 sec: 40959.1). Total num frames: 2327003136. Throughput: 0: 10221.9. Samples: 331729062. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:43,983][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:44,670][626795] Updated weights for policy 0, policy_version 284062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:46,502][626795] Updated weights for policy 0, policy_version 284072 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:48,559][626795] Updated weights for policy 0, policy_version 284082 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:48,976][24592] Fps is (10 sec: 41777.6, 60 sec: 40960.1, 300 sec: 40987.7). Total num frames: 2327216128. Throughput: 0: 10248.9. Samples: 331790928. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:48,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:50,715][626795] Updated weights for policy 0, policy_version 284092 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:52,565][626795] Updated weights for policy 0, policy_version 284102 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:53,976][24592] Fps is (10 sec: 41805.3, 60 sec: 40960.2, 300 sec: 40987.7). Total num frames: 2327420928. Throughput: 0: 10250.9. Samples: 331852284. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:53,976][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:54,679][626795] Updated weights for policy 0, policy_version 284112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:56,657][626795] Updated weights for policy 0, policy_version 284122 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:00:58,682][626795] Updated weights for policy 0, policy_version 284132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:58,975][24592] Fps is (10 sec: 40142.2, 60 sec: 40960.0, 300 sec: 40932.9). Total num frames: 2327617536. Throughput: 0: 10238.3. Samples: 331882386. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:00:58,977][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:00,773][626795] Updated weights for policy 0, policy_version 284142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:02,837][626795] Updated weights for policy 0, policy_version 284152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:03,975][24592] Fps is (10 sec: 40141.3, 60 sec: 40960.6, 300 sec: 40932.2). Total num frames: 2327822336. Throughput: 0: 10203.5. Samples: 331942788. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:03,976][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000284158_2327822336.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:04,108][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000282958_2317991936.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:04,831][626795] Updated weights for policy 0, policy_version 284162 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:06,774][626795] Updated weights for policy 0, policy_version 284172 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:08,797][626795] Updated weights for policy 0, policy_version 284182 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:08,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40963.2, 300 sec: 40960.0). Total num frames: 2328027136. Throughput: 0: 10205.6. Samples: 332004168. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:08,977][24592] Avg episode reward: [(0, '4.849')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:10,766][626795] Updated weights for policy 0, policy_version 284192 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:12,707][626795] Updated weights for policy 0, policy_version 284202 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:13,975][24592] Fps is (10 sec: 40141.3, 60 sec: 40824.1, 300 sec: 40932.6). Total num frames: 2328223744. Throughput: 0: 10223.5. Samples: 332035272. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:13,977][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:14,869][626795] Updated weights for policy 0, policy_version 284212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:16,641][626795] Updated weights for policy 0, policy_version 284222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:18,653][626795] Updated weights for policy 0, policy_version 284232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:18,975][24592] Fps is (10 sec: 40959.8, 60 sec: 40960.0, 300 sec: 40960.0). Total num frames: 2328436736. Throughput: 0: 10236.3. Samples: 332097204. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:18,977][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:20,685][626795] Updated weights for policy 0, policy_version 284242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:22,645][626795] Updated weights for policy 0, policy_version 284252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:23,976][24592] Fps is (10 sec: 41777.3, 60 sec: 40959.8, 300 sec: 40932.2). Total num frames: 2328641536. Throughput: 0: 10259.2. Samples: 332159850. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:23,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:24,725][626795] Updated weights for policy 0, policy_version 284262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:26,533][626795] Updated weights for policy 0, policy_version 284272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:28,676][626795] Updated weights for policy 0, policy_version 284282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:28,975][24592] Fps is (10 sec: 40959.6, 60 sec: 40960.3, 300 sec: 40932.2). Total num frames: 2328846336. Throughput: 0: 10251.1. Samples: 332190294. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:28,976][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:30,665][626795] Updated weights for policy 0, policy_version 284292 (0.0040)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:32,736][626795] Updated weights for policy 0, policy_version 284302 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:33,975][24592] Fps is (10 sec: 40961.5, 60 sec: 40960.1, 300 sec: 40960.0). Total num frames: 2329051136. Throughput: 0: 10219.9. Samples: 332250822. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:33,978][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:34,687][626795] Updated weights for policy 0, policy_version 284312 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:36,816][626795] Updated weights for policy 0, policy_version 284322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:38,704][626795] Updated weights for policy 0, policy_version 284332 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:38,976][24592] Fps is (10 sec: 40960.5, 60 sec: 40960.0, 300 sec: 40960.0). Total num frames: 2329255936. Throughput: 0: 10218.3. Samples: 332312106. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:38,977][24592] Avg episode reward: [(0, '4.973')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:40,764][626795] Updated weights for policy 0, policy_version 284342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:42,675][626795] Updated weights for policy 0, policy_version 284352 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:43,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40964.4, 300 sec: 40960.0). Total num frames: 2329460736. Throughput: 0: 10227.5. Samples: 332342622. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:43,978][24592] Avg episode reward: [(0, '4.891')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:44,707][626795] Updated weights for policy 0, policy_version 284362 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:46,692][626795] Updated weights for policy 0, policy_version 284372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:48,560][626795] Updated weights for policy 0, policy_version 284382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:48,976][24592] Fps is (10 sec: 41777.9, 60 sec: 40960.0, 300 sec: 40987.7). Total num frames: 2329673728. Throughput: 0: 10265.7. Samples: 332404746. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:48,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:50,709][626795] Updated weights for policy 0, policy_version 284392 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:52,623][626795] Updated weights for policy 0, policy_version 284402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:53,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40960.1, 300 sec: 40987.8). Total num frames: 2329878528. Throughput: 0: 10285.2. Samples: 332467002. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:53,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:54,653][626795] Updated weights for policy 0, policy_version 284412 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:56,674][626795] Updated weights for policy 0, policy_version 284422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:01:58,503][626795] Updated weights for policy 0, policy_version 284432 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:58,975][24592] Fps is (10 sec: 40142.4, 60 sec: 40960.1, 300 sec: 40960.0). Total num frames: 2330075136. Throughput: 0: 10282.9. Samples: 332498004. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:01:58,976][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:00,577][626795] Updated weights for policy 0, policy_version 284442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:02,595][626795] Updated weights for policy 0, policy_version 284452 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:03,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41096.6, 300 sec: 40987.8). Total num frames: 2330288128. Throughput: 0: 10263.9. Samples: 332559078. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:03,977][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:04,604][626795] Updated weights for policy 0, policy_version 284462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:06,711][626795] Updated weights for policy 0, policy_version 284472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:08,547][626795] Updated weights for policy 0, policy_version 284482 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:08,975][24592] Fps is (10 sec: 40959.3, 60 sec: 40959.9, 300 sec: 40987.8). Total num frames: 2330484736. Throughput: 0: 10243.3. Samples: 332620794. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:08,978][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:10,619][626795] Updated weights for policy 0, policy_version 284492 (0.0038)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:12,668][626795] Updated weights for policy 0, policy_version 284502 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:13,975][24592] Fps is (10 sec: 40141.0, 60 sec: 41096.5, 300 sec: 40987.8). Total num frames: 2330689536. Throughput: 0: 10236.7. Samples: 332650944. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:13,976][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:14,577][626795] Updated weights for policy 0, policy_version 284512 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:16,570][626795] Updated weights for policy 0, policy_version 284522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:18,596][626795] Updated weights for policy 0, policy_version 284532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:18,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41096.6, 300 sec: 41015.6). Total num frames: 2330902528. Throughput: 0: 10268.8. Samples: 332712918. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:18,976][24592] Avg episode reward: [(0, '4.967')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:20,540][626795] Updated weights for policy 0, policy_version 284542 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:22,549][626795] Updated weights for policy 0, policy_version 284552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41096.8, 300 sec: 41015.8). Total num frames: 2331107328. Throughput: 0: 10300.1. Samples: 332775612. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:23,978][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:24,555][626795] Updated weights for policy 0, policy_version 284562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:26,445][626795] Updated weights for policy 0, policy_version 284572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:28,355][626795] Updated weights for policy 0, policy_version 284582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:28,985][24592] Fps is (10 sec: 40918.9, 60 sec: 41089.8, 300 sec: 40986.4). Total num frames: 2331312128. Throughput: 0: 10299.7. Samples: 332806212. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:28,986][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:30,407][626795] Updated weights for policy 0, policy_version 284592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:32,402][626795] Updated weights for policy 0, policy_version 284602 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:33,977][24592] Fps is (10 sec: 41773.5, 60 sec: 41232.1, 300 sec: 41015.4). Total num frames: 2331525120. Throughput: 0: 10306.6. Samples: 332868552. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:33,980][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:34,439][626795] Updated weights for policy 0, policy_version 284612 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:36,458][626795] Updated weights for policy 0, policy_version 284622 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:38,363][626795] Updated weights for policy 0, policy_version 284632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:38,975][24592] Fps is (10 sec: 41000.9, 60 sec: 41096.5, 300 sec: 40987.8). Total num frames: 2331721728. Throughput: 0: 10286.0. Samples: 332929872. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:38,976][24592] Avg episode reward: [(0, '4.871')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:40,408][626795] Updated weights for policy 0, policy_version 284642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:42,405][626795] Updated weights for policy 0, policy_version 284652 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:43,975][24592] Fps is (10 sec: 40965.8, 60 sec: 41233.1, 300 sec: 41015.6). Total num frames: 2331934720. Throughput: 0: 10268.1. Samples: 332960070. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:43,977][24592] Avg episode reward: [(0, '4.960')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:44,464][626795] Updated weights for policy 0, policy_version 284662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:46,294][626795] Updated weights for policy 0, policy_version 284672 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:48,360][626795] Updated weights for policy 0, policy_version 284682 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:48,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41096.5, 300 sec: 41015.5). Total num frames: 2332139520. Throughput: 0: 10300.6. Samples: 333022608. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:48,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:50,436][626795] Updated weights for policy 0, policy_version 284692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:52,337][626795] Updated weights for policy 0, policy_version 284702 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:53,981][24592] Fps is (10 sec: 40939.8, 60 sec: 41093.2, 300 sec: 41014.9). Total num frames: 2332344320. Throughput: 0: 10294.8. Samples: 333084108. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:53,982][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:54,432][626795] Updated weights for policy 0, policy_version 284712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:56,333][626795] Updated weights for policy 0, policy_version 284722 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:02:58,246][626795] Updated weights for policy 0, policy_version 284732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:58,975][24592] Fps is (10 sec: 40961.8, 60 sec: 41233.0, 300 sec: 40987.8). Total num frames: 2332549120. Throughput: 0: 10303.6. Samples: 333114606. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:02:58,976][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:00,326][626795] Updated weights for policy 0, policy_version 284742 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:02,327][626795] Updated weights for policy 0, policy_version 284752 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:03,975][24592] Fps is (10 sec: 41799.8, 60 sec: 41233.1, 300 sec: 41043.3). Total num frames: 2332762112. Throughput: 0: 10310.5. Samples: 333176892. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:03,978][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:03,988][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000284761_2332762112.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:04,162][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000283560_2322923520.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:04,204][626795] Updated weights for policy 0, policy_version 284762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:06,222][626795] Updated weights for policy 0, policy_version 284772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:08,309][626795] Updated weights for policy 0, policy_version 284782 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:08,976][24592] Fps is (10 sec: 40957.8, 60 sec: 41232.8, 300 sec: 40987.7). Total num frames: 2332958720. Throughput: 0: 10269.5. Samples: 333237744. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:08,977][24592] Avg episode reward: [(0, '4.900')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:10,333][626795] Updated weights for policy 0, policy_version 284792 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:12,346][626795] Updated weights for policy 0, policy_version 284802 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:13,975][24592] Fps is (10 sec: 40140.4, 60 sec: 41233.0, 300 sec: 40987.8). Total num frames: 2333163520. Throughput: 0: 10270.5. Samples: 333268284. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:13,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:14,415][626795] Updated weights for policy 0, policy_version 284812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:16,291][626795] Updated weights for policy 0, policy_version 284822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:18,203][626795] Updated weights for policy 0, policy_version 284832 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:18,976][24592] Fps is (10 sec: 40961.8, 60 sec: 41096.5, 300 sec: 40987.8). Total num frames: 2333368320. Throughput: 0: 10256.7. Samples: 333330090. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:18,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:20,381][626795] Updated weights for policy 0, policy_version 284842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:22,292][626795] Updated weights for policy 0, policy_version 284852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:23,976][24592] Fps is (10 sec: 40959.9, 60 sec: 41096.5, 300 sec: 41015.5). Total num frames: 2333573120. Throughput: 0: 10272.9. Samples: 333392154. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:23,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:24,268][626795] Updated weights for policy 0, policy_version 284862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:26,146][626795] Updated weights for policy 0, policy_version 284872 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:28,125][626795] Updated weights for policy 0, policy_version 284882 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:28,978][24592] Fps is (10 sec: 40949.3, 60 sec: 41101.6, 300 sec: 40987.4). Total num frames: 2333777920. Throughput: 0: 10293.0. Samples: 333423282. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:28,979][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:30,183][626795] Updated weights for policy 0, policy_version 284892 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:32,115][626795] Updated weights for policy 0, policy_version 284902 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:33,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41097.5, 300 sec: 40987.8). Total num frames: 2333990912. Throughput: 0: 10285.8. Samples: 333485466. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:33,978][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:33,993][626795] Updated weights for policy 0, policy_version 284912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:36,066][626795] Updated weights for policy 0, policy_version 284922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:38,011][626795] Updated weights for policy 0, policy_version 284932 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:38,976][24592] Fps is (10 sec: 41788.1, 60 sec: 41232.7, 300 sec: 40987.7). Total num frames: 2334195712. Throughput: 0: 10294.7. Samples: 333547326. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:38,979][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:40,154][626795] Updated weights for policy 0, policy_version 284942 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:42,119][626795] Updated weights for policy 0, policy_version 284952 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:43,976][24592] Fps is (10 sec: 40959.1, 60 sec: 41096.4, 300 sec: 40987.8). Total num frames: 2334400512. Throughput: 0: 10279.8. Samples: 333577200. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:43,976][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:44,245][626795] Updated weights for policy 0, policy_version 284962 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:46,090][626795] Updated weights for policy 0, policy_version 284972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:48,172][626795] Updated weights for policy 0, policy_version 284982 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:48,976][24592] Fps is (10 sec: 40142.3, 60 sec: 40960.2, 300 sec: 40987.9). Total num frames: 2334597120. Throughput: 0: 10265.4. Samples: 333638838. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:48,977][24592] Avg episode reward: [(0, '4.401')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:50,079][626795] Updated weights for policy 0, policy_version 284992 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:52,122][626795] Updated weights for policy 0, policy_version 285002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:53,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41099.9, 300 sec: 41015.6). Total num frames: 2334810112. Throughput: 0: 10291.0. Samples: 333700836. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:53,977][24592] Avg episode reward: [(0, '4.369')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:54,151][626795] Updated weights for policy 0, policy_version 285012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:56,090][626795] Updated weights for policy 0, policy_version 285022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:03:58,006][626795] Updated weights for policy 0, policy_version 285032 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:58,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41232.8, 300 sec: 41015.5). Total num frames: 2335023104. Throughput: 0: 10287.1. Samples: 333731208. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:03:58,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:00,038][626795] Updated weights for policy 0, policy_version 285042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:02,010][626795] Updated weights for policy 0, policy_version 285052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:03,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40959.9, 300 sec: 41015.5). Total num frames: 2335219712. Throughput: 0: 10305.5. Samples: 333793836. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:03,977][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:04,098][626795] Updated weights for policy 0, policy_version 285062 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:06,023][626795] Updated weights for policy 0, policy_version 285072 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:07,901][626795] Updated weights for policy 0, policy_version 285082 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:08,975][24592] Fps is (10 sec: 40142.5, 60 sec: 41096.9, 300 sec: 41015.6). Total num frames: 2335424512. Throughput: 0: 10302.8. Samples: 333855780. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:08,978][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:10,052][626795] Updated weights for policy 0, policy_version 285092 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:12,063][626795] Updated weights for policy 0, policy_version 285102 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:13,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41096.5, 300 sec: 41015.6). Total num frames: 2335629312. Throughput: 0: 10275.5. Samples: 333885654. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:13,976][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:14,051][626795] Updated weights for policy 0, policy_version 285112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:14,741][626772] Signal inference workers to stop experience collection... (4350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:14,742][626772] Signal inference workers to resume experience collection... (4350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:14,754][626795] InferenceWorker_p0-w0: stopping experience collection (4350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:14,755][626795] InferenceWorker_p0-w0: resuming experience collection (4350 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:16,127][626795] Updated weights for policy 0, policy_version 285122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:18,119][626795] Updated weights for policy 0, policy_version 285132 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:18,977][24592] Fps is (10 sec: 40132.8, 60 sec: 40958.7, 300 sec: 40987.6). Total num frames: 2335825920. Throughput: 0: 10240.3. Samples: 333946302. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:18,980][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:20,217][626795] Updated weights for policy 0, policy_version 285142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:22,083][626795] Updated weights for policy 0, policy_version 285152 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:23,976][24592] Fps is (10 sec: 40959.1, 60 sec: 41096.4, 300 sec: 41015.5). Total num frames: 2336038912. Throughput: 0: 10220.7. Samples: 334007256. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:23,977][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:24,215][626795] Updated weights for policy 0, policy_version 285162 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:26,224][626795] Updated weights for policy 0, policy_version 285172 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:28,146][626795] Updated weights for policy 0, policy_version 285182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:28,975][24592] Fps is (10 sec: 41787.4, 60 sec: 41098.4, 300 sec: 41015.5). Total num frames: 2336243712. Throughput: 0: 10228.6. Samples: 334037484. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:28,976][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:30,194][626795] Updated weights for policy 0, policy_version 285192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:32,109][626795] Updated weights for policy 0, policy_version 285202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:33,976][24592] Fps is (10 sec: 40959.3, 60 sec: 40959.7, 300 sec: 41015.5). Total num frames: 2336448512. Throughput: 0: 10256.6. Samples: 334100388. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:33,992][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:34,140][626795] Updated weights for policy 0, policy_version 285212 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:36,068][626795] Updated weights for policy 0, policy_version 285222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:38,054][626795] Updated weights for policy 0, policy_version 285232 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:38,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40960.4, 300 sec: 41015.5). Total num frames: 2336653312. Throughput: 0: 10234.7. Samples: 334161396. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:38,980][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:40,084][626795] Updated weights for policy 0, policy_version 285242 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:42,070][626795] Updated weights for policy 0, policy_version 285252 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:43,975][24592] Fps is (10 sec: 40961.7, 60 sec: 40960.1, 300 sec: 41015.6). Total num frames: 2336858112. Throughput: 0: 10254.1. Samples: 334192638. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:43,977][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:44,140][626795] Updated weights for policy 0, policy_version 285262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:46,210][626795] Updated weights for policy 0, policy_version 285272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:48,165][626795] Updated weights for policy 0, policy_version 285282 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:48,976][24592] Fps is (10 sec: 40139.9, 60 sec: 40960.0, 300 sec: 40987.8). Total num frames: 2337054720. Throughput: 0: 10206.5. Samples: 334253130. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:48,977][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:50,239][626795] Updated weights for policy 0, policy_version 285292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:52,223][626795] Updated weights for policy 0, policy_version 285302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:53,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40960.0, 300 sec: 41043.3). Total num frames: 2337267712. Throughput: 0: 10196.8. Samples: 334314636. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:53,976][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:54,187][626795] Updated weights for policy 0, policy_version 285312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:56,166][626795] Updated weights for policy 0, policy_version 285322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:04:58,068][626795] Updated weights for policy 0, policy_version 285332 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:58,975][24592] Fps is (10 sec: 40961.0, 60 sec: 40687.2, 300 sec: 41015.7). Total num frames: 2337464320. Throughput: 0: 10205.0. Samples: 334344876. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:04:58,977][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:00,084][626795] Updated weights for policy 0, policy_version 285342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:02,145][626795] Updated weights for policy 0, policy_version 285352 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:03,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40960.0, 300 sec: 41043.9). Total num frames: 2337677312. Throughput: 0: 10242.0. Samples: 334407174. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:03,976][24592] Avg episode reward: [(0, '4.885')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000285361_2337677312.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:04,116][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000284158_2327822336.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:04,193][626795] Updated weights for policy 0, policy_version 285362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:06,149][626795] Updated weights for policy 0, policy_version 285372 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:08,125][626795] Updated weights for policy 0, policy_version 285382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:08,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40960.0, 300 sec: 41043.4). Total num frames: 2337882112. Throughput: 0: 10244.9. Samples: 334468272. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:08,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:10,016][626795] Updated weights for policy 0, policy_version 285392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:12,098][626795] Updated weights for policy 0, policy_version 285402 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:13,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40960.0, 300 sec: 41043.3). Total num frames: 2338086912. Throughput: 0: 10255.7. Samples: 334498992. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:13,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:14,189][626795] Updated weights for policy 0, policy_version 285412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:16,083][626795] Updated weights for policy 0, policy_version 285422 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:18,173][626795] Updated weights for policy 0, policy_version 285432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:18,975][24592] Fps is (10 sec: 40140.5, 60 sec: 40961.3, 300 sec: 41015.6). Total num frames: 2338283520. Throughput: 0: 10230.8. Samples: 334560768. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:18,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:20,162][626795] Updated weights for policy 0, policy_version 285442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:22,200][626795] Updated weights for policy 0, policy_version 285452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:23,976][24592] Fps is (10 sec: 40958.5, 60 sec: 40959.9, 300 sec: 41043.3). Total num frames: 2338496512. Throughput: 0: 10220.8. Samples: 334621338. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:23,978][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:24,210][626795] Updated weights for policy 0, policy_version 285462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:26,300][626795] Updated weights for policy 0, policy_version 285472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:28,169][626795] Updated weights for policy 0, policy_version 285482 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:28,975][24592] Fps is (10 sec: 41779.5, 60 sec: 40960.0, 300 sec: 41043.3). Total num frames: 2338701312. Throughput: 0: 10194.4. Samples: 334651386. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:28,977][24592] Avg episode reward: [(0, '4.819')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:30,193][626795] Updated weights for policy 0, policy_version 285492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:32,195][626795] Updated weights for policy 0, policy_version 285502 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:33,975][24592] Fps is (10 sec: 40961.7, 60 sec: 40960.3, 300 sec: 41043.3). Total num frames: 2338906112. Throughput: 0: 10239.1. Samples: 334713888. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:33,976][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:34,237][626795] Updated weights for policy 0, policy_version 285512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:36,217][626795] Updated weights for policy 0, policy_version 285522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:38,057][626795] Updated weights for policy 0, policy_version 285532 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:38,980][24592] Fps is (10 sec: 40939.0, 60 sec: 40956.5, 300 sec: 41043.5). Total num frames: 2339110912. Throughput: 0: 10244.6. Samples: 334775694. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:38,981][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:40,157][626795] Updated weights for policy 0, policy_version 285542 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:42,109][626795] Updated weights for policy 0, policy_version 285552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:43,916][626795] Updated weights for policy 0, policy_version 285562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:43,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41096.4, 300 sec: 41043.3). Total num frames: 2339323904. Throughput: 0: 10262.5. Samples: 334806690. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:43,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:46,033][626795] Updated weights for policy 0, policy_version 285572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:48,060][626795] Updated weights for policy 0, policy_version 285582 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:48,975][24592] Fps is (10 sec: 40980.6, 60 sec: 41096.6, 300 sec: 41015.6). Total num frames: 2339520512. Throughput: 0: 10255.6. Samples: 334868676. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:48,977][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:50,156][626795] Updated weights for policy 0, policy_version 285592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:52,164][626795] Updated weights for policy 0, policy_version 285602 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:53,976][24592] Fps is (10 sec: 40138.9, 60 sec: 40959.5, 300 sec: 41043.2). Total num frames: 2339725312. Throughput: 0: 10251.2. Samples: 334929582. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:53,979][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:54,087][626795] Updated weights for policy 0, policy_version 285612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:56,161][626795] Updated weights for policy 0, policy_version 285622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:05:58,141][626795] Updated weights for policy 0, policy_version 285632 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:58,976][24592] Fps is (10 sec: 40957.1, 60 sec: 41096.0, 300 sec: 41043.2). Total num frames: 2339930112. Throughput: 0: 10239.8. Samples: 334959792. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:05:58,978][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:00,200][626795] Updated weights for policy 0, policy_version 285642 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:02,128][626795] Updated weights for policy 0, policy_version 285652 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:03,976][24592] Fps is (10 sec: 40962.1, 60 sec: 40959.9, 300 sec: 41043.3). Total num frames: 2340134912. Throughput: 0: 10218.8. Samples: 335020614. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:03,978][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:04,079][626795] Updated weights for policy 0, policy_version 285662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:06,148][626795] Updated weights for policy 0, policy_version 285672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:08,189][626795] Updated weights for policy 0, policy_version 285682 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:08,975][24592] Fps is (10 sec: 40144.2, 60 sec: 40823.5, 300 sec: 41043.3). Total num frames: 2340331520. Throughput: 0: 10252.2. Samples: 335082684. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:08,978][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:10,118][626795] Updated weights for policy 0, policy_version 285692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:12,102][626795] Updated weights for policy 0, policy_version 285702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:13,975][24592] Fps is (10 sec: 40960.8, 60 sec: 40960.1, 300 sec: 41043.3). Total num frames: 2340544512. Throughput: 0: 10270.8. Samples: 335113572. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:13,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:14,134][626795] Updated weights for policy 0, policy_version 285712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:16,059][626795] Updated weights for policy 0, policy_version 285722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:17,994][626795] Updated weights for policy 0, policy_version 285732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:18,976][24592] Fps is (10 sec: 41776.6, 60 sec: 41096.2, 300 sec: 41043.3). Total num frames: 2340749312. Throughput: 0: 10264.4. Samples: 335175792. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:18,978][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:20,192][626795] Updated weights for policy 0, policy_version 285742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:22,129][626795] Updated weights for policy 0, policy_version 285752 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:23,975][24592] Fps is (10 sec: 40959.6, 60 sec: 40960.3, 300 sec: 41043.3). Total num frames: 2340954112. Throughput: 0: 10220.5. Samples: 335235564. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:23,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:24,264][626795] Updated weights for policy 0, policy_version 285762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:26,256][626795] Updated weights for policy 0, policy_version 285772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:28,272][626795] Updated weights for policy 0, policy_version 285782 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:28,975][24592] Fps is (10 sec: 40142.9, 60 sec: 40823.4, 300 sec: 41015.5). Total num frames: 2341150720. Throughput: 0: 10205.9. Samples: 335265954. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:28,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:30,209][626795] Updated weights for policy 0, policy_version 285792 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:32,282][626795] Updated weights for policy 0, policy_version 285802 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:33,975][24592] Fps is (10 sec: 40960.3, 60 sec: 40960.1, 300 sec: 41043.3). Total num frames: 2341363712. Throughput: 0: 10190.4. Samples: 335327244. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:33,976][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:34,238][626795] Updated weights for policy 0, policy_version 285812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:36,178][626795] Updated weights for policy 0, policy_version 285822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:38,250][626795] Updated weights for policy 0, policy_version 285832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:38,975][24592] Fps is (10 sec: 41779.4, 60 sec: 40963.5, 300 sec: 41043.3). Total num frames: 2341568512. Throughput: 0: 10219.5. Samples: 335389452. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:38,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:40,133][626795] Updated weights for policy 0, policy_version 285842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:42,164][626795] Updated weights for policy 0, policy_version 285852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:43,975][24592] Fps is (10 sec: 40959.8, 60 sec: 40823.6, 300 sec: 41015.6). Total num frames: 2341773312. Throughput: 0: 10240.4. Samples: 335420604. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:43,977][24592] Avg episode reward: [(0, '4.414')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:44,142][626795] Updated weights for policy 0, policy_version 285862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:46,036][626795] Updated weights for policy 0, policy_version 285872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:48,034][626795] Updated weights for policy 0, policy_version 285882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:48,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41096.6, 300 sec: 41043.3). Total num frames: 2341986304. Throughput: 0: 10267.8. Samples: 335482662. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:48,977][24592] Avg episode reward: [(0, '4.410')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:50,042][626795] Updated weights for policy 0, policy_version 285892 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:52,066][626795] Updated weights for policy 0, policy_version 285902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:53,976][24592] Fps is (10 sec: 40959.3, 60 sec: 40960.3, 300 sec: 41043.3). Total num frames: 2342182912. Throughput: 0: 10254.2. Samples: 335544126. Policy #0 lag: (min: 0.0, avg: 2.0, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:53,977][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:54,066][626795] Updated weights for policy 0, policy_version 285912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:56,187][626795] Updated weights for policy 0, policy_version 285922 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:06:58,129][626795] Updated weights for policy 0, policy_version 285932 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:58,976][24592] Fps is (10 sec: 40139.9, 60 sec: 40960.4, 300 sec: 41015.5). Total num frames: 2342387712. Throughput: 0: 10232.1. Samples: 335574018. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:06:58,977][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:00,024][626795] Updated weights for policy 0, policy_version 285942 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:02,128][626795] Updated weights for policy 0, policy_version 285952 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:03,976][24592] Fps is (10 sec: 40956.3, 60 sec: 40959.4, 300 sec: 41043.2). Total num frames: 2342592512. Throughput: 0: 10221.2. Samples: 335635752. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:03,979][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:04,006][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000285962_2342600704.pth...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:04,021][626795] Updated weights for policy 0, policy_version 285962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:04,115][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000284761_2332762112.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:06,186][626795] Updated weights for policy 0, policy_version 285972 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:08,231][626795] Updated weights for policy 0, policy_version 285982 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:08,975][24592] Fps is (10 sec: 40960.8, 60 sec: 41096.5, 300 sec: 41043.3). Total num frames: 2342797312. Throughput: 0: 10254.8. Samples: 335697030. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:08,977][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:10,105][626795] Updated weights for policy 0, policy_version 285992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:12,151][626795] Updated weights for policy 0, policy_version 286002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:13,975][24592] Fps is (10 sec: 40964.4, 60 sec: 40960.0, 300 sec: 41015.5). Total num frames: 2343002112. Throughput: 0: 10266.0. Samples: 335727924. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:13,977][24592] Avg episode reward: [(0, '5.024')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:14,204][626795] Updated weights for policy 0, policy_version 286012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:16,090][626795] Updated weights for policy 0, policy_version 286022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:18,123][626795] Updated weights for policy 0, policy_version 286032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:18,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40960.4, 300 sec: 41015.5). Total num frames: 2343206912. Throughput: 0: 10266.1. Samples: 335789220. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:18,976][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:20,147][626795] Updated weights for policy 0, policy_version 286042 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:22,066][626795] Updated weights for policy 0, policy_version 286052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:23,976][24592] Fps is (10 sec: 40959.2, 60 sec: 40959.9, 300 sec: 41016.9). Total num frames: 2343411712. Throughput: 0: 10260.9. Samples: 335851194. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:23,978][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:24,191][626795] Updated weights for policy 0, policy_version 286062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:26,228][626795] Updated weights for policy 0, policy_version 286072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:28,080][626795] Updated weights for policy 0, policy_version 286082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:28,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41096.6, 300 sec: 40988.0). Total num frames: 2343616512. Throughput: 0: 10240.4. Samples: 335881422. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:28,976][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:30,154][626795] Updated weights for policy 0, policy_version 286092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:32,213][626795] Updated weights for policy 0, policy_version 286102 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:33,975][24592] Fps is (10 sec: 40960.8, 60 sec: 40960.0, 300 sec: 41015.5). Total num frames: 2343821312. Throughput: 0: 10225.6. Samples: 335942814. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:33,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:34,182][626795] Updated weights for policy 0, policy_version 286112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:36,035][626795] Updated weights for policy 0, policy_version 286122 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:38,150][626795] Updated weights for policy 0, policy_version 286132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:38,976][24592] Fps is (10 sec: 40958.6, 60 sec: 40959.8, 300 sec: 40987.7). Total num frames: 2344026112. Throughput: 0: 10245.3. Samples: 336005166. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:38,976][24592] Avg episode reward: [(0, '4.424')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:40,049][626795] Updated weights for policy 0, policy_version 286142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:42,057][626795] Updated weights for policy 0, policy_version 286152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:43,976][24592] Fps is (10 sec: 40959.2, 60 sec: 40959.9, 300 sec: 40987.8). Total num frames: 2344230912. Throughput: 0: 10274.9. Samples: 336036390. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:43,978][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:43,991][626795] Updated weights for policy 0, policy_version 286162 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:45,939][626795] Updated weights for policy 0, policy_version 286172 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:47,970][626795] Updated weights for policy 0, policy_version 286182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:48,975][24592] Fps is (10 sec: 41780.8, 60 sec: 40960.0, 300 sec: 41016.2). Total num frames: 2344443904. Throughput: 0: 10289.5. Samples: 336098766. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:48,977][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:49,886][626795] Updated weights for policy 0, policy_version 286192 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:51,937][626795] Updated weights for policy 0, policy_version 286202 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:53,719][626795] Updated weights for policy 0, policy_version 286212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:53,977][24592] Fps is (10 sec: 41775.3, 60 sec: 41095.9, 300 sec: 41015.4). Total num frames: 2344648704. Throughput: 0: 10317.9. Samples: 336161346. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:53,978][24592] Avg episode reward: [(0, '4.179')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:55,809][626795] Updated weights for policy 0, policy_version 286222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:57,881][626795] Updated weights for policy 0, policy_version 286232 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:58,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41096.5, 300 sec: 40987.7). Total num frames: 2344853504. Throughput: 0: 10291.1. Samples: 336191028. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:07:58,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:07:59,928][626795] Updated weights for policy 0, policy_version 286242 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:02,165][626795] Updated weights for policy 0, policy_version 286252 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:03,975][24592] Fps is (10 sec: 40145.4, 60 sec: 40960.7, 300 sec: 40987.8). Total num frames: 2345050112. Throughput: 0: 10262.8. Samples: 336251046. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:03,976][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:03,986][626795] Updated weights for policy 0, policy_version 286262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:06,032][626795] Updated weights for policy 0, policy_version 286272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:07,970][626795] Updated weights for policy 0, policy_version 286282 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:08,975][24592] Fps is (10 sec: 40141.9, 60 sec: 40960.0, 300 sec: 40987.8). Total num frames: 2345254912. Throughput: 0: 10261.1. Samples: 336312942. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:08,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:09,923][626795] Updated weights for policy 0, policy_version 286292 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:11,952][626795] Updated weights for policy 0, policy_version 286302 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:13,867][626795] Updated weights for policy 0, policy_version 286312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:13,976][24592] Fps is (10 sec: 41778.1, 60 sec: 41096.4, 300 sec: 41015.5). Total num frames: 2345467904. Throughput: 0: 10286.6. Samples: 336344322. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:13,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:15,887][626795] Updated weights for policy 0, policy_version 286322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:17,864][626795] Updated weights for policy 0, policy_version 286332 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:18,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41233.1, 300 sec: 41043.3). Total num frames: 2345680896. Throughput: 0: 10308.5. Samples: 336406698. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:18,976][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:19,754][626795] Updated weights for policy 0, policy_version 286342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:21,761][626795] Updated weights for policy 0, policy_version 286352 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:23,659][626795] Updated weights for policy 0, policy_version 286362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:23,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41369.4, 300 sec: 41071.4). Total num frames: 2345893888. Throughput: 0: 10338.4. Samples: 336470394. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:23,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:25,655][626795] Updated weights for policy 0, policy_version 286372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:27,577][626795] Updated weights for policy 0, policy_version 286382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:28,976][24592] Fps is (10 sec: 41777.4, 60 sec: 41369.3, 300 sec: 41043.3). Total num frames: 2346098688. Throughput: 0: 10325.3. Samples: 336501030. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:28,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:29,697][626795] Updated weights for policy 0, policy_version 286392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:31,509][626795] Updated weights for policy 0, policy_version 286402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:33,532][626795] Updated weights for policy 0, policy_version 286412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:33,976][24592] Fps is (10 sec: 40961.1, 60 sec: 41369.5, 300 sec: 41043.4). Total num frames: 2346303488. Throughput: 0: 10314.1. Samples: 336562902. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:33,976][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:35,630][626795] Updated weights for policy 0, policy_version 286422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:37,648][626795] Updated weights for policy 0, policy_version 286432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:38,975][24592] Fps is (10 sec: 40961.6, 60 sec: 41369.8, 300 sec: 41043.3). Total num frames: 2346508288. Throughput: 0: 10308.7. Samples: 336625224. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:38,976][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:39,512][626795] Updated weights for policy 0, policy_version 286442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:41,427][626795] Updated weights for policy 0, policy_version 286452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:43,417][626795] Updated weights for policy 0, policy_version 286462 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:43,975][24592] Fps is (10 sec: 40961.0, 60 sec: 41369.8, 300 sec: 41071.1). Total num frames: 2346713088. Throughput: 0: 10346.3. Samples: 336656610. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:43,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:45,430][626795] Updated weights for policy 0, policy_version 286472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:47,259][626795] Updated weights for policy 0, policy_version 286482 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:48,976][24592] Fps is (10 sec: 41776.9, 60 sec: 41369.2, 300 sec: 41071.0). Total num frames: 2346926080. Throughput: 0: 10402.8. Samples: 336719178. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:48,978][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:49,347][626795] Updated weights for policy 0, policy_version 286492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:51,190][626795] Updated weights for policy 0, policy_version 286502 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:53,155][626795] Updated weights for policy 0, policy_version 286512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:53,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41506.9, 300 sec: 41071.1). Total num frames: 2347139072. Throughput: 0: 10426.9. Samples: 336782154. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:53,977][24592] Avg episode reward: [(0, '4.914')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:55,215][626795] Updated weights for policy 0, policy_version 286522 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:57,183][626795] Updated weights for policy 0, policy_version 286532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:08:58,957][626795] Updated weights for policy 0, policy_version 286542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:58,986][24592] Fps is (10 sec: 42557.8, 60 sec: 41635.8, 300 sec: 41125.2). Total num frames: 2347352064. Throughput: 0: 10422.3. Samples: 336813426. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:08:58,987][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:01,100][626795] Updated weights for policy 0, policy_version 286552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:03,189][626795] Updated weights for policy 0, policy_version 286562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:03,976][24592] Fps is (10 sec: 40959.2, 60 sec: 41642.5, 300 sec: 41098.8). Total num frames: 2347548672. Throughput: 0: 10403.4. Samples: 336874854. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:03,978][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000286566_2347548672.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:04,056][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000285361_2337677312.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:05,101][626795] Updated weights for policy 0, policy_version 286572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:07,075][626795] Updated weights for policy 0, policy_version 286582 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:08,977][24592] Fps is (10 sec: 40175.4, 60 sec: 41641.6, 300 sec: 41098.7). Total num frames: 2347753472. Throughput: 0: 10362.4. Samples: 336936714. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:08,981][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:09,003][626795] Updated weights for policy 0, policy_version 286592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:11,102][626795] Updated weights for policy 0, policy_version 286602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:13,026][626795] Updated weights for policy 0, policy_version 286612 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:13,976][24592] Fps is (10 sec: 40959.5, 60 sec: 41506.1, 300 sec: 41126.8). Total num frames: 2347958272. Throughput: 0: 10363.1. Samples: 336967368. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:13,978][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:15,046][626795] Updated weights for policy 0, policy_version 286622 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:16,955][626795] Updated weights for policy 0, policy_version 286632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:18,845][626795] Updated weights for policy 0, policy_version 286642 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:18,975][24592] Fps is (10 sec: 41785.5, 60 sec: 41506.1, 300 sec: 41126.7). Total num frames: 2348171264. Throughput: 0: 10384.3. Samples: 337030194. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:18,976][24592] Avg episode reward: [(0, '4.867')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:20,815][626795] Updated weights for policy 0, policy_version 286652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:22,820][626795] Updated weights for policy 0, policy_version 286662 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:23,975][24592] Fps is (10 sec: 42599.9, 60 sec: 41506.5, 300 sec: 41154.4). Total num frames: 2348384256. Throughput: 0: 10417.6. Samples: 337094016. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:23,976][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:24,768][626795] Updated weights for policy 0, policy_version 286672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:26,721][626795] Updated weights for policy 0, policy_version 286682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:28,638][626795] Updated weights for policy 0, policy_version 286692 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:28,979][24592] Fps is (10 sec: 41763.4, 60 sec: 41503.8, 300 sec: 41153.9). Total num frames: 2348589056. Throughput: 0: 10399.7. Samples: 337124634. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:28,988][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:30,581][626795] Updated weights for policy 0, policy_version 286702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:32,564][626795] Updated weights for policy 0, policy_version 286712 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:33,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41506.2, 300 sec: 41154.4). Total num frames: 2348793856. Throughput: 0: 10404.9. Samples: 337187394. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:33,976][24592] Avg episode reward: [(0, '4.564')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:34,574][626795] Updated weights for policy 0, policy_version 286722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:36,615][626795] Updated weights for policy 0, policy_version 286732 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:38,519][626795] Updated weights for policy 0, policy_version 286742 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:38,975][24592] Fps is (10 sec: 41794.8, 60 sec: 41642.6, 300 sec: 41182.2). Total num frames: 2349006848. Throughput: 0: 10390.0. Samples: 337249704. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:38,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:40,581][626795] Updated weights for policy 0, policy_version 286752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:42,472][626795] Updated weights for policy 0, policy_version 286762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41642.6, 300 sec: 41210.0). Total num frames: 2349211648. Throughput: 0: 10375.4. Samples: 337280214. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:43,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:44,483][626795] Updated weights for policy 0, policy_version 286772 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:46,388][626795] Updated weights for policy 0, policy_version 286782 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:48,257][626795] Updated weights for policy 0, policy_version 286792 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:48,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41643.1, 300 sec: 41209.9). Total num frames: 2349424640. Throughput: 0: 10421.7. Samples: 337343826. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:48,977][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:50,357][626795] Updated weights for policy 0, policy_version 286802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:52,271][626795] Updated weights for policy 0, policy_version 286812 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:53,975][24592] Fps is (10 sec: 42599.0, 60 sec: 41642.8, 300 sec: 41265.5). Total num frames: 2349637632. Throughput: 0: 10447.2. Samples: 337406820. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:53,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:54,196][626795] Updated weights for policy 0, policy_version 286822 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:56,116][626795] Updated weights for policy 0, policy_version 286832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:09:58,117][626795] Updated weights for policy 0, policy_version 286842 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:58,976][24592] Fps is (10 sec: 41776.7, 60 sec: 41512.7, 300 sec: 41237.6). Total num frames: 2349842432. Throughput: 0: 10451.3. Samples: 337437678. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:09:58,978][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:00,085][626795] Updated weights for policy 0, policy_version 286852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:02,002][626795] Updated weights for policy 0, policy_version 286862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:03,909][626795] Updated weights for policy 0, policy_version 286872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:03,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41779.3, 300 sec: 41265.4). Total num frames: 2350055424. Throughput: 0: 10463.6. Samples: 337501056. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:03,977][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:05,919][626795] Updated weights for policy 0, policy_version 286882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:07,920][626795] Updated weights for policy 0, policy_version 286892 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:08,975][24592] Fps is (10 sec: 41781.2, 60 sec: 41780.2, 300 sec: 41265.5). Total num frames: 2350260224. Throughput: 0: 10414.8. Samples: 337562682. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:08,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:09,987][626795] Updated weights for policy 0, policy_version 286902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:11,960][626795] Updated weights for policy 0, policy_version 286912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:13,827][626795] Updated weights for policy 0, policy_version 286922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:13,976][24592] Fps is (10 sec: 40958.2, 60 sec: 41779.0, 300 sec: 41293.2). Total num frames: 2350465024. Throughput: 0: 10418.5. Samples: 337593432. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:13,979][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:15,930][626795] Updated weights for policy 0, policy_version 286932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:17,830][626795] Updated weights for policy 0, policy_version 286942 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:18,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41642.7, 300 sec: 41265.5). Total num frames: 2350669824. Throughput: 0: 10411.8. Samples: 337655922. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:18,977][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:19,884][626795] Updated weights for policy 0, policy_version 286952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:21,766][626795] Updated weights for policy 0, policy_version 286962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:23,799][626795] Updated weights for policy 0, policy_version 286972 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:23,975][24592] Fps is (10 sec: 41781.8, 60 sec: 41642.7, 300 sec: 41293.2). Total num frames: 2350882816. Throughput: 0: 10415.7. Samples: 337718412. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:23,977][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:25,592][626795] Updated weights for policy 0, policy_version 286982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:27,623][626795] Updated weights for policy 0, policy_version 286992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:28,975][24592] Fps is (10 sec: 42597.8, 60 sec: 41781.7, 300 sec: 41321.0). Total num frames: 2351095808. Throughput: 0: 10443.4. Samples: 337750170. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:28,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:29,517][626795] Updated weights for policy 0, policy_version 287002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:31,526][626795] Updated weights for policy 0, policy_version 287012 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:33,432][626795] Updated weights for policy 0, policy_version 287022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:33,976][24592] Fps is (10 sec: 41774.8, 60 sec: 41778.6, 300 sec: 41321.6). Total num frames: 2351300608. Throughput: 0: 10441.2. Samples: 337813692. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:33,978][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:35,400][626795] Updated weights for policy 0, policy_version 287032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:37,432][626795] Updated weights for policy 0, policy_version 287042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:38,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41642.6, 300 sec: 41293.2). Total num frames: 2351505408. Throughput: 0: 10420.5. Samples: 337875744. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:38,976][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:39,386][626795] Updated weights for policy 0, policy_version 287052 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:41,390][626795] Updated weights for policy 0, policy_version 287062 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:41,649][626772] Signal inference workers to stop experience collection... (4400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:41,650][626772] Signal inference workers to resume experience collection... (4400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:41,658][626795] InferenceWorker_p0-w0: stopping experience collection (4400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:41,663][626795] InferenceWorker_p0-w0: resuming experience collection (4400 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:43,359][626795] Updated weights for policy 0, policy_version 287072 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:43,976][24592] Fps is (10 sec: 41779.6, 60 sec: 41778.6, 300 sec: 41348.7). Total num frames: 2351718400. Throughput: 0: 10425.4. Samples: 337906824. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:43,977][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:45,363][626795] Updated weights for policy 0, policy_version 287082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:47,245][626795] Updated weights for policy 0, policy_version 287092 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41642.6, 300 sec: 41348.9). Total num frames: 2351923200. Throughput: 0: 10395.1. Samples: 337968834. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:48,977][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:49,393][626795] Updated weights for policy 0, policy_version 287102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:51,411][626795] Updated weights for policy 0, policy_version 287112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:53,295][626795] Updated weights for policy 0, policy_version 287122 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:53,976][24592] Fps is (10 sec: 40960.1, 60 sec: 41505.5, 300 sec: 41348.8). Total num frames: 2352128000. Throughput: 0: 10376.5. Samples: 338029632. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:53,978][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:55,300][626795] Updated weights for policy 0, policy_version 287132 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:57,208][626795] Updated weights for policy 0, policy_version 287142 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:58,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41643.1, 300 sec: 41376.6). Total num frames: 2352340992. Throughput: 0: 10396.0. Samples: 338061246. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:10:58,978][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:10:59,285][626795] Updated weights for policy 0, policy_version 287152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:01,152][626795] Updated weights for policy 0, policy_version 287162 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:02,970][626795] Updated weights for policy 0, policy_version 287172 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:03,975][24592] Fps is (10 sec: 41782.8, 60 sec: 41506.2, 300 sec: 41404.3). Total num frames: 2352545792. Throughput: 0: 10424.4. Samples: 338125020. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:03,976][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000287176_2352545792.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:04,048][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000285962_2342600704.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:05,100][626795] Updated weights for policy 0, policy_version 287182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:06,978][626795] Updated weights for policy 0, policy_version 287192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:08,927][626795] Updated weights for policy 0, policy_version 287202 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:08,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41642.6, 300 sec: 41404.3). Total num frames: 2352758784. Throughput: 0: 10421.8. Samples: 338187396. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:08,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:11,099][626795] Updated weights for policy 0, policy_version 287212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:13,100][626795] Updated weights for policy 0, policy_version 287222 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:13,977][24592] Fps is (10 sec: 40955.0, 60 sec: 41505.7, 300 sec: 41376.4). Total num frames: 2352955392. Throughput: 0: 10381.2. Samples: 338217336. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:13,978][24592] Avg episode reward: [(0, '4.423')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:15,251][626795] Updated weights for policy 0, policy_version 287232 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:17,137][626795] Updated weights for policy 0, policy_version 287242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:18,975][24592] Fps is (10 sec: 40141.6, 60 sec: 41506.1, 300 sec: 41376.5). Total num frames: 2353160192. Throughput: 0: 10311.7. Samples: 338277708. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:18,976][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:18,995][626795] Updated weights for policy 0, policy_version 287252 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:21,060][626795] Updated weights for policy 0, policy_version 287262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:23,077][626795] Updated weights for policy 0, policy_version 287272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:23,975][24592] Fps is (10 sec: 40965.2, 60 sec: 41369.6, 300 sec: 41404.3). Total num frames: 2353364992. Throughput: 0: 10318.6. Samples: 338340078. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:23,978][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:24,944][626795] Updated weights for policy 0, policy_version 287282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:27,026][626795] Updated weights for policy 0, policy_version 287292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:28,865][626795] Updated weights for policy 0, policy_version 287302 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:28,978][24592] Fps is (10 sec: 41769.1, 60 sec: 41368.0, 300 sec: 41404.0). Total num frames: 2353577984. Throughput: 0: 10330.5. Samples: 338371710. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:28,978][24592] Avg episode reward: [(0, '4.414')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:30,885][626795] Updated weights for policy 0, policy_version 287312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:32,741][626795] Updated weights for policy 0, policy_version 287322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:33,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41506.9, 300 sec: 41432.1). Total num frames: 2353790976. Throughput: 0: 10360.4. Samples: 338435052. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:33,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:34,693][626795] Updated weights for policy 0, policy_version 287332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:36,610][626795] Updated weights for policy 0, policy_version 287342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:38,598][626795] Updated weights for policy 0, policy_version 287352 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:38,975][24592] Fps is (10 sec: 42608.9, 60 sec: 41642.8, 300 sec: 41459.9). Total num frames: 2354003968. Throughput: 0: 10416.9. Samples: 338498382. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:38,976][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:40,555][626795] Updated weights for policy 0, policy_version 287362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:42,658][626795] Updated weights for policy 0, policy_version 287372 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:43,975][24592] Fps is (10 sec: 40959.3, 60 sec: 41370.1, 300 sec: 41404.3). Total num frames: 2354200576. Throughput: 0: 10389.4. Samples: 338528772. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:43,976][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:44,577][626795] Updated weights for policy 0, policy_version 287382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:46,620][626795] Updated weights for policy 0, policy_version 287392 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:48,532][626795] Updated weights for policy 0, policy_version 287402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:48,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41506.2, 300 sec: 41459.9). Total num frames: 2354413568. Throughput: 0: 10344.9. Samples: 338590542. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:48,977][24592] Avg episode reward: [(0, '4.932')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:50,548][626795] Updated weights for policy 0, policy_version 287412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:52,593][626795] Updated weights for policy 0, policy_version 287422 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:53,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41506.7, 300 sec: 41459.9). Total num frames: 2354618368. Throughput: 0: 10350.7. Samples: 338653176. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:53,978][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:54,545][626795] Updated weights for policy 0, policy_version 287432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:56,462][626795] Updated weights for policy 0, policy_version 287442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:11:58,371][626795] Updated weights for policy 0, policy_version 287452 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41506.1, 300 sec: 41487.8). Total num frames: 2354831360. Throughput: 0: 10372.7. Samples: 338684094. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:11:58,976][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:00,357][626795] Updated weights for policy 0, policy_version 287462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:02,267][626795] Updated weights for policy 0, policy_version 287472 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:03,976][24592] Fps is (10 sec: 41776.2, 60 sec: 41505.6, 300 sec: 41487.5). Total num frames: 2355036160. Throughput: 0: 10440.1. Samples: 338747520. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:03,978][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:04,192][626795] Updated weights for policy 0, policy_version 287482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:06,104][626795] Updated weights for policy 0, policy_version 287492 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:07,964][626795] Updated weights for policy 0, policy_version 287502 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:08,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41642.8, 300 sec: 41543.2). Total num frames: 2355257344. Throughput: 0: 10486.3. Samples: 338811960. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:08,977][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:10,010][626795] Updated weights for policy 0, policy_version 287512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:11,760][626795] Updated weights for policy 0, policy_version 287522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:13,889][626795] Updated weights for policy 0, policy_version 287532 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:13,975][24592] Fps is (10 sec: 43421.3, 60 sec: 41916.7, 300 sec: 41571.0). Total num frames: 2355470336. Throughput: 0: 10481.4. Samples: 338843346. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:13,977][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:15,956][626795] Updated weights for policy 0, policy_version 287542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:17,769][626795] Updated weights for policy 0, policy_version 287552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:18,976][24592] Fps is (10 sec: 40957.8, 60 sec: 41778.9, 300 sec: 41543.1). Total num frames: 2355666944. Throughput: 0: 10454.4. Samples: 338905506. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:18,978][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:19,891][626795] Updated weights for policy 0, policy_version 287562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:21,895][626795] Updated weights for policy 0, policy_version 287572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:23,616][626795] Updated weights for policy 0, policy_version 287582 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:23,975][24592] Fps is (10 sec: 40140.3, 60 sec: 41779.2, 300 sec: 41543.2). Total num frames: 2355871744. Throughput: 0: 10429.5. Samples: 338967708. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:23,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:25,772][626795] Updated weights for policy 0, policy_version 287592 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:27,732][626795] Updated weights for policy 0, policy_version 287602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:28,976][24592] Fps is (10 sec: 41779.9, 60 sec: 41780.7, 300 sec: 41570.9). Total num frames: 2356084736. Throughput: 0: 10446.4. Samples: 338998860. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:28,979][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:29,591][626795] Updated weights for policy 0, policy_version 287612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:31,597][626795] Updated weights for policy 0, policy_version 287622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:33,438][626795] Updated weights for policy 0, policy_version 287632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:33,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41779.2, 300 sec: 41598.8). Total num frames: 2356297728. Throughput: 0: 10468.0. Samples: 339061602. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:33,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:35,480][626795] Updated weights for policy 0, policy_version 287642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:37,492][626795] Updated weights for policy 0, policy_version 287652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:38,975][24592] Fps is (10 sec: 42599.6, 60 sec: 41779.1, 300 sec: 41626.5). Total num frames: 2356510720. Throughput: 0: 10496.4. Samples: 339125514. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:38,977][24592] Avg episode reward: [(0, '5.021')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:39,357][626795] Updated weights for policy 0, policy_version 287662 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:41,320][626795] Updated weights for policy 0, policy_version 287672 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:43,229][626795] Updated weights for policy 0, policy_version 287682 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:43,975][24592] Fps is (10 sec: 41778.6, 60 sec: 41915.7, 300 sec: 41598.7). Total num frames: 2356715520. Throughput: 0: 10500.5. Samples: 339156618. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:43,977][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:45,152][626795] Updated weights for policy 0, policy_version 287692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:47,116][626795] Updated weights for policy 0, policy_version 287702 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:48,976][24592] Fps is (10 sec: 41777.1, 60 sec: 41915.4, 300 sec: 41626.6). Total num frames: 2356928512. Throughput: 0: 10474.2. Samples: 339218856. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:48,977][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:49,285][626795] Updated weights for policy 0, policy_version 287712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:51,116][626795] Updated weights for policy 0, policy_version 287722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:53,197][626795] Updated weights for policy 0, policy_version 287732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:53,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41915.7, 300 sec: 41626.5). Total num frames: 2357133312. Throughput: 0: 10423.7. Samples: 339281028. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:53,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:55,205][626795] Updated weights for policy 0, policy_version 287742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:57,119][626795] Updated weights for policy 0, policy_version 287752 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:58,975][24592] Fps is (10 sec: 40962.2, 60 sec: 41779.2, 300 sec: 41654.2). Total num frames: 2357338112. Throughput: 0: 10410.6. Samples: 339311826. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:12:58,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:12:59,116][626795] Updated weights for policy 0, policy_version 287762 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:01,040][626795] Updated weights for policy 0, policy_version 287772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:02,993][626795] Updated weights for policy 0, policy_version 287782 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:03,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41916.1, 300 sec: 41682.0). Total num frames: 2357551104. Throughput: 0: 10433.1. Samples: 339374994. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:03,978][24592] Avg episode reward: [(0, '4.361')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000287787_2357551104.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:04,074][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000286566_2347548672.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:04,999][626795] Updated weights for policy 0, policy_version 287792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:07,174][626795] Updated weights for policy 0, policy_version 287802 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:08,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41506.1, 300 sec: 41626.5). Total num frames: 2357747712. Throughput: 0: 10391.6. Samples: 339435330. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:08,976][24592] Avg episode reward: [(0, '4.838')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:09,049][626795] Updated weights for policy 0, policy_version 287812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:11,058][626795] Updated weights for policy 0, policy_version 287822 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:12,950][626795] Updated weights for policy 0, policy_version 287832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:13,976][24592] Fps is (10 sec: 40959.0, 60 sec: 41505.7, 300 sec: 41626.4). Total num frames: 2357960704. Throughput: 0: 10390.8. Samples: 339466446. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:13,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:15,001][626795] Updated weights for policy 0, policy_version 287842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:16,842][626795] Updated weights for policy 0, policy_version 287852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:18,830][626795] Updated weights for policy 0, policy_version 287862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:18,977][24592] Fps is (10 sec: 41771.1, 60 sec: 41641.7, 300 sec: 41598.5). Total num frames: 2358165504. Throughput: 0: 10398.1. Samples: 339529536. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:18,978][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:20,843][626795] Updated weights for policy 0, policy_version 287872 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:22,912][626795] Updated weights for policy 0, policy_version 287882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:23,975][24592] Fps is (10 sec: 40961.4, 60 sec: 41642.6, 300 sec: 41598.7). Total num frames: 2358370304. Throughput: 0: 10355.8. Samples: 339591528. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:23,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:24,863][626795] Updated weights for policy 0, policy_version 287892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:26,732][626795] Updated weights for policy 0, policy_version 287902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:28,738][626795] Updated weights for policy 0, policy_version 287912 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:28,979][24592] Fps is (10 sec: 41773.2, 60 sec: 41640.6, 300 sec: 41626.0). Total num frames: 2358583296. Throughput: 0: 10344.1. Samples: 339622134. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:28,980][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:30,842][626795] Updated weights for policy 0, policy_version 287922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:32,739][626795] Updated weights for policy 0, policy_version 287932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:33,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41506.1, 300 sec: 41626.5). Total num frames: 2358788096. Throughput: 0: 10355.2. Samples: 339684834. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:33,979][24592] Avg episode reward: [(0, '4.821')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:34,637][626795] Updated weights for policy 0, policy_version 287942 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:36,671][626795] Updated weights for policy 0, policy_version 287952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:38,532][626795] Updated weights for policy 0, policy_version 287962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:38,975][24592] Fps is (10 sec: 40973.6, 60 sec: 41369.6, 300 sec: 41626.5). Total num frames: 2358992896. Throughput: 0: 10379.2. Samples: 339748092. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:38,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:40,462][626795] Updated weights for policy 0, policy_version 287972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:42,412][626795] Updated weights for policy 0, policy_version 287982 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:43,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41642.8, 300 sec: 41654.3). Total num frames: 2359214080. Throughput: 0: 10385.5. Samples: 339779172. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:43,978][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:44,540][626795] Updated weights for policy 0, policy_version 287992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:46,388][626795] Updated weights for policy 0, policy_version 288002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:48,216][626795] Updated weights for policy 0, policy_version 288012 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:48,976][24592] Fps is (10 sec: 42595.7, 60 sec: 41506.1, 300 sec: 41626.4). Total num frames: 2359418880. Throughput: 0: 10382.0. Samples: 339842190. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:48,978][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:50,351][626795] Updated weights for policy 0, policy_version 288022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:52,323][626795] Updated weights for policy 0, policy_version 288032 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:53,975][24592] Fps is (10 sec: 40959.4, 60 sec: 41506.1, 300 sec: 41600.1). Total num frames: 2359623680. Throughput: 0: 10410.6. Samples: 339903810. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:53,977][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:54,419][626795] Updated weights for policy 0, policy_version 288042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:56,369][626795] Updated weights for policy 0, policy_version 288052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:13:58,382][626795] Updated weights for policy 0, policy_version 288062 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:58,981][24592] Fps is (10 sec: 40119.7, 60 sec: 41365.5, 300 sec: 41597.9). Total num frames: 2359820288. Throughput: 0: 10382.2. Samples: 339933702. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:13:58,983][24592] Avg episode reward: [(0, '5.033')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:00,349][626795] Updated weights for policy 0, policy_version 288072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:02,336][626795] Updated weights for policy 0, policy_version 288082 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:03,976][24592] Fps is (10 sec: 40959.7, 60 sec: 41369.6, 300 sec: 41626.7). Total num frames: 2360033280. Throughput: 0: 10368.3. Samples: 339996090. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:03,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:04,288][626795] Updated weights for policy 0, policy_version 288092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:06,245][626795] Updated weights for policy 0, policy_version 288102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:08,260][626795] Updated weights for policy 0, policy_version 288112 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:08,975][24592] Fps is (10 sec: 41803.7, 60 sec: 41506.1, 300 sec: 41626.5). Total num frames: 2360238080. Throughput: 0: 10374.7. Samples: 340058388. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:08,977][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:10,161][626795] Updated weights for policy 0, policy_version 288122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:12,228][626795] Updated weights for policy 0, policy_version 288132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:13,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41506.5, 300 sec: 41626.5). Total num frames: 2360451072. Throughput: 0: 10399.4. Samples: 340090074. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:13,979][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:13,999][626795] Updated weights for policy 0, policy_version 288142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:15,988][626795] Updated weights for policy 0, policy_version 288152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:17,950][626795] Updated weights for policy 0, policy_version 288162 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:18,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41644.0, 300 sec: 41626.5). Total num frames: 2360664064. Throughput: 0: 10409.3. Samples: 340153254. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:18,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:19,854][626795] Updated weights for policy 0, policy_version 288172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:21,928][626795] Updated weights for policy 0, policy_version 288182 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:23,951][626795] Updated weights for policy 0, policy_version 288192 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:23,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41642.6, 300 sec: 41627.0). Total num frames: 2360868864. Throughput: 0: 10383.2. Samples: 340215336. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:23,976][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:25,939][626795] Updated weights for policy 0, policy_version 288202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:27,918][626795] Updated weights for policy 0, policy_version 288212 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:28,975][24592] Fps is (10 sec: 41778.5, 60 sec: 41644.9, 300 sec: 41654.2). Total num frames: 2361081856. Throughput: 0: 10368.2. Samples: 340245744. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:28,977][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:29,832][626795] Updated weights for policy 0, policy_version 288222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:31,853][626795] Updated weights for policy 0, policy_version 288232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:33,736][626795] Updated weights for policy 0, policy_version 288242 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:33,979][24592] Fps is (10 sec: 40945.5, 60 sec: 41503.6, 300 sec: 41598.2). Total num frames: 2361278464. Throughput: 0: 10359.3. Samples: 340308390. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:33,980][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:35,749][626795] Updated weights for policy 0, policy_version 288252 (0.0035)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:37,751][626795] Updated weights for policy 0, policy_version 288262 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:38,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41642.7, 300 sec: 41626.5). Total num frames: 2361491456. Throughput: 0: 10397.2. Samples: 340371684. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:38,977][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:39,649][626795] Updated weights for policy 0, policy_version 288272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:41,535][626795] Updated weights for policy 0, policy_version 288282 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:43,510][626795] Updated weights for policy 0, policy_version 288292 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:43,976][24592] Fps is (10 sec: 41794.3, 60 sec: 41369.5, 300 sec: 41598.7). Total num frames: 2361696256. Throughput: 0: 10432.7. Samples: 340403112. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:43,978][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:45,503][626795] Updated weights for policy 0, policy_version 288302 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:47,506][626795] Updated weights for policy 0, policy_version 288312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:48,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41643.1, 300 sec: 41626.5). Total num frames: 2361917440. Throughput: 0: 10442.8. Samples: 340466016. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:48,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:49,467][626795] Updated weights for policy 0, policy_version 288322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:51,335][626795] Updated weights for policy 0, policy_version 288332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:53,282][626795] Updated weights for policy 0, policy_version 288342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:53,975][24592] Fps is (10 sec: 42599.1, 60 sec: 41642.8, 300 sec: 41626.5). Total num frames: 2362122240. Throughput: 0: 10446.2. Samples: 340528464. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:53,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:55,378][626795] Updated weights for policy 0, policy_version 288352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:57,472][626795] Updated weights for policy 0, policy_version 288362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:58,975][24592] Fps is (10 sec: 40140.9, 60 sec: 41646.8, 300 sec: 41570.9). Total num frames: 2362318848. Throughput: 0: 10402.3. Samples: 340558176. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:14:58,980][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:14:59,531][626795] Updated weights for policy 0, policy_version 288372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:01,478][626795] Updated weights for policy 0, policy_version 288382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:03,351][626795] Updated weights for policy 0, policy_version 288392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:03,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41642.8, 300 sec: 41598.7). Total num frames: 2362531840. Throughput: 0: 10357.3. Samples: 340619334. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:03,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000288395_2362531840.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:04,068][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000287176_2352545792.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:05,498][626795] Updated weights for policy 0, policy_version 288402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:07,461][626795] Updated weights for policy 0, policy_version 288412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:08,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41506.2, 300 sec: 41571.0). Total num frames: 2362728448. Throughput: 0: 10350.2. Samples: 340681092. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:08,976][24592] Avg episode reward: [(0, '4.877')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:09,516][626795] Updated weights for policy 0, policy_version 288422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:11,480][626795] Updated weights for policy 0, policy_version 288432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:13,428][626795] Updated weights for policy 0, policy_version 288442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:13,976][24592] Fps is (10 sec: 40957.9, 60 sec: 41505.7, 300 sec: 41598.6). Total num frames: 2362941440. Throughput: 0: 10357.9. Samples: 340711854. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:13,977][24592] Avg episode reward: [(0, '4.864')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:15,347][626795] Updated weights for policy 0, policy_version 288452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:17,407][626795] Updated weights for policy 0, policy_version 288462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:18,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.6, 300 sec: 41570.9). Total num frames: 2363146240. Throughput: 0: 10352.2. Samples: 340774200. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:18,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:19,296][626795] Updated weights for policy 0, policy_version 288472 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:21,267][626795] Updated weights for policy 0, policy_version 288482 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:23,188][626795] Updated weights for policy 0, policy_version 288492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:23,976][24592] Fps is (10 sec: 40961.1, 60 sec: 41369.5, 300 sec: 41543.1). Total num frames: 2363351040. Throughput: 0: 10329.8. Samples: 340836528. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:23,978][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:25,222][626795] Updated weights for policy 0, policy_version 288502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:27,190][626795] Updated weights for policy 0, policy_version 288512 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:28,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41233.1, 300 sec: 41543.3). Total num frames: 2363555840. Throughput: 0: 10313.2. Samples: 340867206. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:28,976][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:29,329][626795] Updated weights for policy 0, policy_version 288522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:31,323][626795] Updated weights for policy 0, policy_version 288532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:33,293][626795] Updated weights for policy 0, policy_version 288542 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:33,976][24592] Fps is (10 sec: 40960.1, 60 sec: 41372.0, 300 sec: 41543.1). Total num frames: 2363760640. Throughput: 0: 10272.7. Samples: 340928292. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:33,978][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:35,330][626795] Updated weights for policy 0, policy_version 288552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:37,211][626795] Updated weights for policy 0, policy_version 288562 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:38,976][24592] Fps is (10 sec: 40959.7, 60 sec: 41233.0, 300 sec: 41515.5). Total num frames: 2363965440. Throughput: 0: 10260.9. Samples: 340990206. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:38,977][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:39,334][626795] Updated weights for policy 0, policy_version 288572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:41,189][626795] Updated weights for policy 0, policy_version 288582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:43,154][626795] Updated weights for policy 0, policy_version 288592 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:43,975][24592] Fps is (10 sec: 41780.4, 60 sec: 41369.7, 300 sec: 41543.2). Total num frames: 2364178432. Throughput: 0: 10289.7. Samples: 341021214. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:43,977][24592] Avg episode reward: [(0, '4.911')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:45,164][626795] Updated weights for policy 0, policy_version 288602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:47,144][626795] Updated weights for policy 0, policy_version 288612 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:48,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41096.5, 300 sec: 41543.3). Total num frames: 2364383232. Throughput: 0: 10318.7. Samples: 341083674. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:48,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:49,174][626795] Updated weights for policy 0, policy_version 288622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:51,118][626795] Updated weights for policy 0, policy_version 288632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:53,081][626795] Updated weights for policy 0, policy_version 288642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:53,976][24592] Fps is (10 sec: 41777.0, 60 sec: 41232.7, 300 sec: 41543.1). Total num frames: 2364596224. Throughput: 0: 10324.4. Samples: 341145696. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:53,977][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:55,076][626795] Updated weights for policy 0, policy_version 288652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:57,019][626795] Updated weights for policy 0, policy_version 288662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:15:58,936][626795] Updated weights for policy 0, policy_version 288672 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:58,976][24592] Fps is (10 sec: 41778.7, 60 sec: 41369.5, 300 sec: 41543.1). Total num frames: 2364801024. Throughput: 0: 10334.8. Samples: 341176914. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:15:58,977][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:00,982][626795] Updated weights for policy 0, policy_version 288682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:02,983][626795] Updated weights for policy 0, policy_version 288692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:03,976][24592] Fps is (10 sec: 40960.4, 60 sec: 41232.8, 300 sec: 41515.4). Total num frames: 2365005824. Throughput: 0: 10317.4. Samples: 341238486. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:03,978][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:05,005][626795] Updated weights for policy 0, policy_version 288702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:06,971][626795] Updated weights for policy 0, policy_version 288712 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:08,838][626795] Updated weights for policy 0, policy_version 288722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:08,976][24592] Fps is (10 sec: 40960.0, 60 sec: 41369.5, 300 sec: 41543.3). Total num frames: 2365210624. Throughput: 0: 10325.0. Samples: 341301150. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:08,976][24592] Avg episode reward: [(0, '4.393')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:10,931][626795] Updated weights for policy 0, policy_version 288732 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:12,812][626795] Updated weights for policy 0, policy_version 288742 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:13,975][24592] Fps is (10 sec: 40961.5, 60 sec: 41233.4, 300 sec: 41543.2). Total num frames: 2365415424. Throughput: 0: 10332.5. Samples: 341332170. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:13,977][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:14,818][626795] Updated weights for policy 0, policy_version 288752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:16,717][626795] Updated weights for policy 0, policy_version 288762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:18,683][626795] Updated weights for policy 0, policy_version 288772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:18,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41369.6, 300 sec: 41570.9). Total num frames: 2365628416. Throughput: 0: 10366.2. Samples: 341394768. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:18,977][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:20,625][626795] Updated weights for policy 0, policy_version 288782 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:22,630][626795] Updated weights for policy 0, policy_version 288792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:23,978][24592] Fps is (10 sec: 42587.2, 60 sec: 41504.5, 300 sec: 41570.9). Total num frames: 2365841408. Throughput: 0: 10386.6. Samples: 341457630. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:23,980][24592] Avg episode reward: [(0, '4.943')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:24,675][626795] Updated weights for policy 0, policy_version 288802 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:26,567][626795] Updated weights for policy 0, policy_version 288812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:28,483][626795] Updated weights for policy 0, policy_version 288822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:28,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41506.1, 300 sec: 41543.1). Total num frames: 2366046208. Throughput: 0: 10392.2. Samples: 341488866. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:28,977][24592] Avg episode reward: [(0, '4.938')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:30,495][626795] Updated weights for policy 0, policy_version 288832 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:32,518][626795] Updated weights for policy 0, policy_version 288842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:33,976][24592] Fps is (10 sec: 40968.6, 60 sec: 41505.9, 300 sec: 41515.3). Total num frames: 2366251008. Throughput: 0: 10366.3. Samples: 341550162. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:33,977][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:34,587][626795] Updated weights for policy 0, policy_version 288852 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:36,540][626795] Updated weights for policy 0, policy_version 288862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:38,517][626795] Updated weights for policy 0, policy_version 288872 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:38,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41506.2, 300 sec: 41543.2). Total num frames: 2366455808. Throughput: 0: 10360.8. Samples: 341611926. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:38,976][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:40,538][626795] Updated weights for policy 0, policy_version 288882 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:42,525][626795] Updated weights for policy 0, policy_version 288892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:43,976][24592] Fps is (10 sec: 40960.4, 60 sec: 41369.3, 300 sec: 41515.3). Total num frames: 2366660608. Throughput: 0: 10361.7. Samples: 341643192. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:43,980][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:44,427][626795] Updated weights for policy 0, policy_version 288902 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:46,370][626795] Updated weights for policy 0, policy_version 288912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:48,369][626795] Updated weights for policy 0, policy_version 288922 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:48,976][24592] Fps is (10 sec: 40959.6, 60 sec: 41369.5, 300 sec: 41515.4). Total num frames: 2366865408. Throughput: 0: 10379.7. Samples: 341705568. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:48,977][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:50,308][626795] Updated weights for policy 0, policy_version 288932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:52,297][626795] Updated weights for policy 0, policy_version 288942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:53,975][24592] Fps is (10 sec: 41781.3, 60 sec: 41370.0, 300 sec: 41515.4). Total num frames: 2367078400. Throughput: 0: 10387.0. Samples: 341768562. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:53,977][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:54,221][626795] Updated weights for policy 0, policy_version 288952 (0.0038)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:56,313][626795] Updated weights for policy 0, policy_version 288962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:16:58,178][626795] Updated weights for policy 0, policy_version 288972 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:58,976][24592] Fps is (10 sec: 42598.0, 60 sec: 41506.1, 300 sec: 41543.2). Total num frames: 2367291392. Throughput: 0: 10376.1. Samples: 341799096. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:16:58,978][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:00,164][626795] Updated weights for policy 0, policy_version 288982 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:02,232][626795] Updated weights for policy 0, policy_version 288992 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:02,973][626772] Signal inference workers to stop experience collection... (4450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:02,976][626772] Signal inference workers to resume experience collection... (4450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:02,992][626795] InferenceWorker_p0-w0: stopping experience collection (4450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:02,993][626795] InferenceWorker_p0-w0: resuming experience collection (4450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:03,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41506.2, 300 sec: 41487.6). Total num frames: 2367496192. Throughput: 0: 10380.6. Samples: 341861898. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:03,977][24592] Avg episode reward: [(0, '4.353')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000289001_2367496192.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:04,070][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000287787_2357551104.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:04,132][626795] Updated weights for policy 0, policy_version 289002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:06,169][626795] Updated weights for policy 0, policy_version 289012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:08,230][626795] Updated weights for policy 0, policy_version 289022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:08,975][24592] Fps is (10 sec: 40141.7, 60 sec: 41369.7, 300 sec: 41432.1). Total num frames: 2367692800. Throughput: 0: 10327.9. Samples: 341922360. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:08,977][24592] Avg episode reward: [(0, '4.480')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:10,226][626795] Updated weights for policy 0, policy_version 289032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:12,270][626795] Updated weights for policy 0, policy_version 289042 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:13,975][24592] Fps is (10 sec: 40960.9, 60 sec: 41506.1, 300 sec: 41487.7). Total num frames: 2367905792. Throughput: 0: 10331.1. Samples: 341953764. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:13,976][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:14,049][626795] Updated weights for policy 0, policy_version 289052 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:16,097][626795] Updated weights for policy 0, policy_version 289062 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:18,005][626795] Updated weights for policy 0, policy_version 289072 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:18,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.6, 300 sec: 41487.6). Total num frames: 2368110592. Throughput: 0: 10337.9. Samples: 342015360. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:18,977][24592] Avg episode reward: [(0, '4.392')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:20,128][626795] Updated weights for policy 0, policy_version 289082 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:22,003][626795] Updated weights for policy 0, policy_version 289092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:23,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41234.9, 300 sec: 41459.9). Total num frames: 2368315392. Throughput: 0: 10366.9. Samples: 342078438. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:23,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:23,997][626795] Updated weights for policy 0, policy_version 289102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:25,978][626795] Updated weights for policy 0, policy_version 289112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:27,929][626795] Updated weights for policy 0, policy_version 289122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:28,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41506.2, 300 sec: 41487.6). Total num frames: 2368536576. Throughput: 0: 10368.6. Samples: 342109776. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:28,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:29,765][626795] Updated weights for policy 0, policy_version 289132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:31,836][626795] Updated weights for policy 0, policy_version 289142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:33,774][626795] Updated weights for policy 0, policy_version 289152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:33,975][24592] Fps is (10 sec: 43417.4, 60 sec: 41643.0, 300 sec: 41487.6). Total num frames: 2368749568. Throughput: 0: 10377.3. Samples: 342172548. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:33,977][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:35,712][626795] Updated weights for policy 0, policy_version 289162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:37,744][626795] Updated weights for policy 0, policy_version 289172 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:38,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41506.1, 300 sec: 41459.9). Total num frames: 2368946176. Throughput: 0: 10358.5. Samples: 342234696. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:38,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:39,827][626795] Updated weights for policy 0, policy_version 289182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:41,645][626795] Updated weights for policy 0, policy_version 289192 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:43,681][626795] Updated weights for policy 0, policy_version 289202 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:43,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41643.0, 300 sec: 41459.9). Total num frames: 2369159168. Throughput: 0: 10368.0. Samples: 342265656. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:43,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:45,713][626795] Updated weights for policy 0, policy_version 289212 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:47,670][626795] Updated weights for policy 0, policy_version 289222 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:48,976][24592] Fps is (10 sec: 41776.1, 60 sec: 41642.2, 300 sec: 41459.7). Total num frames: 2369363968. Throughput: 0: 10358.1. Samples: 342328020. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:48,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:49,560][626795] Updated weights for policy 0, policy_version 289232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:51,557][626795] Updated weights for policy 0, policy_version 289242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:53,580][626795] Updated weights for policy 0, policy_version 289252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:53,976][24592] Fps is (10 sec: 40958.1, 60 sec: 41505.8, 300 sec: 41459.8). Total num frames: 2369568768. Throughput: 0: 10406.2. Samples: 342390642. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:53,977][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:55,558][626795] Updated weights for policy 0, policy_version 289262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:57,437][626795] Updated weights for policy 0, policy_version 289272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:58,975][24592] Fps is (10 sec: 40963.5, 60 sec: 41369.8, 300 sec: 41432.1). Total num frames: 2369773568. Throughput: 0: 10389.8. Samples: 342421302. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:17:58,976][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:17:59,430][626795] Updated weights for policy 0, policy_version 289282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:01,412][626795] Updated weights for policy 0, policy_version 289292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:03,337][626795] Updated weights for policy 0, policy_version 289302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:03,975][24592] Fps is (10 sec: 41781.0, 60 sec: 41506.3, 300 sec: 41487.6). Total num frames: 2369986560. Throughput: 0: 10420.6. Samples: 342484290. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:03,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:05,304][626795] Updated weights for policy 0, policy_version 289312 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:07,184][626795] Updated weights for policy 0, policy_version 289322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:08,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41642.6, 300 sec: 41459.9). Total num frames: 2370191360. Throughput: 0: 10404.5. Samples: 342546642. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:08,977][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:09,297][626795] Updated weights for policy 0, policy_version 289332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:11,309][626795] Updated weights for policy 0, policy_version 289342 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:13,335][626795] Updated weights for policy 0, policy_version 289352 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:13,976][24592] Fps is (10 sec: 40140.6, 60 sec: 41369.6, 300 sec: 41432.3). Total num frames: 2370387968. Throughput: 0: 10372.0. Samples: 342576516. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:13,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:15,381][626795] Updated weights for policy 0, policy_version 289362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:17,390][626795] Updated weights for policy 0, policy_version 289372 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:18,975][24592] Fps is (10 sec: 40960.4, 60 sec: 41506.1, 300 sec: 41459.9). Total num frames: 2370600960. Throughput: 0: 10343.6. Samples: 342638010. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:18,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:19,251][626795] Updated weights for policy 0, policy_version 289382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:21,255][626795] Updated weights for policy 0, policy_version 289392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:23,270][626795] Updated weights for policy 0, policy_version 289402 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.1, 300 sec: 41432.5). Total num frames: 2370805760. Throughput: 0: 10359.3. Samples: 342700866. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:23,976][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:25,165][626795] Updated weights for policy 0, policy_version 289412 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:27,199][626795] Updated weights for policy 0, policy_version 289422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:28,968][626795] Updated weights for policy 0, policy_version 289432 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:28,976][24592] Fps is (10 sec: 42598.4, 60 sec: 41506.1, 300 sec: 41487.6). Total num frames: 2371026944. Throughput: 0: 10364.7. Samples: 342732066. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:28,977][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:31,028][626795] Updated weights for policy 0, policy_version 289442 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:32,965][626795] Updated weights for policy 0, policy_version 289452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:33,976][24592] Fps is (10 sec: 42597.9, 60 sec: 41369.5, 300 sec: 41487.6). Total num frames: 2371231744. Throughput: 0: 10383.9. Samples: 342795288. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:33,977][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:35,006][626795] Updated weights for policy 0, policy_version 289462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:36,896][626795] Updated weights for policy 0, policy_version 289472 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:38,905][626795] Updated weights for policy 0, policy_version 289482 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:38,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41642.7, 300 sec: 41459.8). Total num frames: 2371444736. Throughput: 0: 10389.1. Samples: 342858144. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:38,976][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:40,819][626795] Updated weights for policy 0, policy_version 289492 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:42,904][626795] Updated weights for policy 0, policy_version 289502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:43,981][24592] Fps is (10 sec: 40936.8, 60 sec: 41365.6, 300 sec: 41431.3). Total num frames: 2371641344. Throughput: 0: 10381.4. Samples: 342888528. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:43,982][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:44,877][626795] Updated weights for policy 0, policy_version 289512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:46,828][626795] Updated weights for policy 0, policy_version 289522 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:48,772][626795] Updated weights for policy 0, policy_version 289532 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:48,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41506.6, 300 sec: 41459.8). Total num frames: 2371854336. Throughput: 0: 10353.1. Samples: 342950178. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:48,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:50,789][626795] Updated weights for policy 0, policy_version 289542 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:52,781][626795] Updated weights for policy 0, policy_version 289552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:53,976][24592] Fps is (10 sec: 41801.0, 60 sec: 41506.0, 300 sec: 41488.4). Total num frames: 2372059136. Throughput: 0: 10355.6. Samples: 343012650. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:53,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:54,777][626795] Updated weights for policy 0, policy_version 289562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:56,770][626795] Updated weights for policy 0, policy_version 289572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:18:58,549][626795] Updated weights for policy 0, policy_version 289582 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:58,978][24592] Fps is (10 sec: 40949.7, 60 sec: 41504.3, 300 sec: 41459.5). Total num frames: 2372263936. Throughput: 0: 10381.4. Samples: 343043706. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:18:58,979][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:00,665][626795] Updated weights for policy 0, policy_version 289592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:02,568][626795] Updated weights for policy 0, policy_version 289602 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:03,976][24592] Fps is (10 sec: 41781.4, 60 sec: 41506.1, 300 sec: 41487.6). Total num frames: 2372476928. Throughput: 0: 10406.1. Samples: 343106286. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:03,978][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000289609_2372476928.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:04,066][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000288395_2362531840.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:04,698][626795] Updated weights for policy 0, policy_version 289612 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:06,516][626795] Updated weights for policy 0, policy_version 289622 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:08,522][626795] Updated weights for policy 0, policy_version 289632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:08,975][24592] Fps is (10 sec: 41790.2, 60 sec: 41506.2, 300 sec: 41459.8). Total num frames: 2372681728. Throughput: 0: 10394.7. Samples: 343168626. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:08,976][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:10,491][626795] Updated weights for policy 0, policy_version 289642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:12,458][626795] Updated weights for policy 0, policy_version 289652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:13,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41642.7, 300 sec: 41432.1). Total num frames: 2372886528. Throughput: 0: 10389.1. Samples: 343199574. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:13,976][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:14,501][626795] Updated weights for policy 0, policy_version 289662 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:16,325][626795] Updated weights for policy 0, policy_version 289672 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:18,272][626795] Updated weights for policy 0, policy_version 289682 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:18,977][24592] Fps is (10 sec: 41773.3, 60 sec: 41641.7, 300 sec: 41459.7). Total num frames: 2373099520. Throughput: 0: 10387.3. Samples: 343262730. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:18,978][24592] Avg episode reward: [(0, '4.892')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:20,235][626795] Updated weights for policy 0, policy_version 289692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:22,202][626795] Updated weights for policy 0, policy_version 289702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:23,976][24592] Fps is (10 sec: 42596.7, 60 sec: 41779.0, 300 sec: 41459.8). Total num frames: 2373312512. Throughput: 0: 10400.4. Samples: 343326168. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:23,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:24,189][626795] Updated weights for policy 0, policy_version 289712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:26,162][626795] Updated weights for policy 0, policy_version 289722 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:28,431][626795] Updated weights for policy 0, policy_version 289732 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:28,976][24592] Fps is (10 sec: 40145.3, 60 sec: 41232.9, 300 sec: 41432.6). Total num frames: 2373500928. Throughput: 0: 10348.9. Samples: 343354170. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:28,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:30,464][626795] Updated weights for policy 0, policy_version 289742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:32,464][626795] Updated weights for policy 0, policy_version 289752 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:33,975][24592] Fps is (10 sec: 39323.4, 60 sec: 41233.2, 300 sec: 41404.3). Total num frames: 2373705728. Throughput: 0: 10338.2. Samples: 343415394. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:33,977][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:34,314][626795] Updated weights for policy 0, policy_version 289762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:36,287][626795] Updated weights for policy 0, policy_version 289772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:38,189][626795] Updated weights for policy 0, policy_version 289782 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:38,975][24592] Fps is (10 sec: 41780.5, 60 sec: 41233.1, 300 sec: 41432.1). Total num frames: 2373918720. Throughput: 0: 10354.0. Samples: 343478574. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:38,976][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:40,232][626795] Updated weights for policy 0, policy_version 289792 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:42,108][626795] Updated weights for policy 0, policy_version 289802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:43,975][24592] Fps is (10 sec: 43417.8, 60 sec: 41646.8, 300 sec: 41432.1). Total num frames: 2374139904. Throughput: 0: 10365.0. Samples: 343510104. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:43,977][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:43,979][626795] Updated weights for policy 0, policy_version 289812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:46,065][626795] Updated weights for policy 0, policy_version 289822 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:47,875][626795] Updated weights for policy 0, policy_version 289832 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:48,979][24592] Fps is (10 sec: 42586.0, 60 sec: 41504.2, 300 sec: 41431.7). Total num frames: 2374344704. Throughput: 0: 10379.2. Samples: 343573380. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:48,980][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:50,020][626795] Updated weights for policy 0, policy_version 289842 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:51,888][626795] Updated weights for policy 0, policy_version 289852 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:53,761][626795] Updated weights for policy 0, policy_version 289862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:53,977][24592] Fps is (10 sec: 40953.2, 60 sec: 41505.5, 300 sec: 41459.6). Total num frames: 2374549504. Throughput: 0: 10393.4. Samples: 343636344. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:53,979][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:55,861][626795] Updated weights for policy 0, policy_version 289872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:57,725][626795] Updated weights for policy 0, policy_version 289882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:58,975][24592] Fps is (10 sec: 41791.2, 60 sec: 41644.5, 300 sec: 41459.9). Total num frames: 2374762496. Throughput: 0: 10400.7. Samples: 343667604. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:19:58,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:19:59,696][626795] Updated weights for policy 0, policy_version 289892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:01,815][626795] Updated weights for policy 0, policy_version 289902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:03,608][626795] Updated weights for policy 0, policy_version 289912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:03,975][24592] Fps is (10 sec: 41785.3, 60 sec: 41506.1, 300 sec: 41487.6). Total num frames: 2374967296. Throughput: 0: 10381.5. Samples: 343729884. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:03,978][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:05,589][626795] Updated weights for policy 0, policy_version 289922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:07,707][626795] Updated weights for policy 0, policy_version 289932 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:08,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41642.6, 300 sec: 41487.7). Total num frames: 2375180288. Throughput: 0: 10348.9. Samples: 343791864. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:08,977][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:09,548][626795] Updated weights for policy 0, policy_version 289942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:11,605][626795] Updated weights for policy 0, policy_version 289952 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:13,442][626795] Updated weights for policy 0, policy_version 289962 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:13,976][24592] Fps is (10 sec: 40959.9, 60 sec: 41506.1, 300 sec: 41459.8). Total num frames: 2375376896. Throughput: 0: 10416.4. Samples: 343822908. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:13,976][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:15,581][626795] Updated weights for policy 0, policy_version 289972 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:17,451][626795] Updated weights for policy 0, policy_version 289982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:18,975][24592] Fps is (10 sec: 40960.9, 60 sec: 41507.1, 300 sec: 41487.7). Total num frames: 2375589888. Throughput: 0: 10454.4. Samples: 343885842. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:18,976][24592] Avg episode reward: [(0, '4.926')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:19,584][626795] Updated weights for policy 0, policy_version 289992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:21,323][626795] Updated weights for policy 0, policy_version 290002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:23,387][626795] Updated weights for policy 0, policy_version 290012 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:23,975][24592] Fps is (10 sec: 42599.3, 60 sec: 41506.5, 300 sec: 41515.4). Total num frames: 2375802880. Throughput: 0: 10442.1. Samples: 343948470. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:23,977][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:25,425][626795] Updated weights for policy 0, policy_version 290022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:27,305][626795] Updated weights for policy 0, policy_version 290032 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:28,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.4, 300 sec: 41515.4). Total num frames: 2376007680. Throughput: 0: 10429.8. Samples: 343979448. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:28,977][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:29,321][626795] Updated weights for policy 0, policy_version 290042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:31,265][626795] Updated weights for policy 0, policy_version 290052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:33,107][626795] Updated weights for policy 0, policy_version 290062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:33,976][24592] Fps is (10 sec: 40959.2, 60 sec: 41779.1, 300 sec: 41515.4). Total num frames: 2376212480. Throughput: 0: 10424.5. Samples: 344042454. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:33,977][24592] Avg episode reward: [(0, '5.157')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:35,183][626795] Updated weights for policy 0, policy_version 290072 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:37,223][626795] Updated weights for policy 0, policy_version 290082 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:38,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41779.0, 300 sec: 41515.4). Total num frames: 2376425472. Throughput: 0: 10392.2. Samples: 344103978. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:38,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:39,302][626795] Updated weights for policy 0, policy_version 290092 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:41,215][626795] Updated weights for policy 0, policy_version 290102 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:43,138][626795] Updated weights for policy 0, policy_version 290112 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:43,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41505.9, 300 sec: 41515.4). Total num frames: 2376630272. Throughput: 0: 10375.1. Samples: 344134488. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:43,978][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:45,153][626795] Updated weights for policy 0, policy_version 290122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:47,090][626795] Updated weights for policy 0, policy_version 290132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:48,975][24592] Fps is (10 sec: 41780.0, 60 sec: 41644.6, 300 sec: 41515.4). Total num frames: 2376843264. Throughput: 0: 10383.6. Samples: 344197146. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:48,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:48,981][626795] Updated weights for policy 0, policy_version 290142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:51,005][626795] Updated weights for policy 0, policy_version 290152 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:53,026][626795] Updated weights for policy 0, policy_version 290162 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:53,975][24592] Fps is (10 sec: 40961.4, 60 sec: 41507.3, 300 sec: 41487.7). Total num frames: 2377039872. Throughput: 0: 10370.3. Samples: 344258526. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:53,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:55,104][626795] Updated weights for policy 0, policy_version 290172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:57,022][626795] Updated weights for policy 0, policy_version 290182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:20:58,962][626795] Updated weights for policy 0, policy_version 290192 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:58,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41506.1, 300 sec: 41515.4). Total num frames: 2377252864. Throughput: 0: 10378.2. Samples: 344289924. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:20:58,977][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:01,083][626795] Updated weights for policy 0, policy_version 290202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:02,795][626795] Updated weights for policy 0, policy_version 290212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:03,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41506.2, 300 sec: 41515.4). Total num frames: 2377457664. Throughput: 0: 10361.2. Samples: 344352096. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:03,976][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000290217_2377457664.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:04,067][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000289001_2367496192.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:04,957][626795] Updated weights for policy 0, policy_version 290222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:06,958][626795] Updated weights for policy 0, policy_version 290232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:08,975][24592] Fps is (10 sec: 40140.7, 60 sec: 41233.2, 300 sec: 41487.6). Total num frames: 2377654272. Throughput: 0: 10325.8. Samples: 344413134. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:08,977][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:09,026][626795] Updated weights for policy 0, policy_version 290242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:10,974][626795] Updated weights for policy 0, policy_version 290252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:12,920][626795] Updated weights for policy 0, policy_version 290262 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:13,975][24592] Fps is (10 sec: 40959.6, 60 sec: 41506.2, 300 sec: 41487.6). Total num frames: 2377867264. Throughput: 0: 10324.0. Samples: 344444028. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:13,977][24592] Avg episode reward: [(0, '4.447')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:14,985][626795] Updated weights for policy 0, policy_version 290272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:16,917][626795] Updated weights for policy 0, policy_version 290282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:18,802][626795] Updated weights for policy 0, policy_version 290292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:18,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41506.1, 300 sec: 41488.0). Total num frames: 2378080256. Throughput: 0: 10309.9. Samples: 344506398. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:18,976][24592] Avg episode reward: [(0, '4.914')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:20,767][626795] Updated weights for policy 0, policy_version 290302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:22,649][626795] Updated weights for policy 0, policy_version 290312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:23,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41506.1, 300 sec: 41515.4). Total num frames: 2378293248. Throughput: 0: 10358.9. Samples: 344570124. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:23,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:24,661][626795] Updated weights for policy 0, policy_version 290322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:26,614][626795] Updated weights for policy 0, policy_version 290332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:28,456][626795] Updated weights for policy 0, policy_version 290342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:28,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41506.2, 300 sec: 41515.5). Total num frames: 2378498048. Throughput: 0: 10378.2. Samples: 344601504. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:28,976][24592] Avg episode reward: [(0, '4.940')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:30,555][626795] Updated weights for policy 0, policy_version 290352 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:32,429][626795] Updated weights for policy 0, policy_version 290362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:33,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41642.7, 300 sec: 41543.1). Total num frames: 2378711040. Throughput: 0: 10390.8. Samples: 344664732. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:33,977][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:34,459][626795] Updated weights for policy 0, policy_version 290372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:36,325][626795] Updated weights for policy 0, policy_version 290382 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:38,402][626795] Updated weights for policy 0, policy_version 290392 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:38,976][24592] Fps is (10 sec: 41777.2, 60 sec: 41506.0, 300 sec: 41543.2). Total num frames: 2378915840. Throughput: 0: 10402.5. Samples: 344726646. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:38,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:40,398][626795] Updated weights for policy 0, policy_version 290402 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:42,461][626795] Updated weights for policy 0, policy_version 290412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:43,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41506.3, 300 sec: 41543.2). Total num frames: 2379120640. Throughput: 0: 10378.8. Samples: 344756970. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:43,976][24592] Avg episode reward: [(0, '4.499')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:44,340][626795] Updated weights for policy 0, policy_version 290422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:46,330][626795] Updated weights for policy 0, policy_version 290432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:48,210][626795] Updated weights for policy 0, policy_version 290442 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:48,976][24592] Fps is (10 sec: 40960.9, 60 sec: 41369.5, 300 sec: 41515.3). Total num frames: 2379325440. Throughput: 0: 10393.8. Samples: 344819820. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:48,980][24592] Avg episode reward: [(0, '5.048')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:50,314][626795] Updated weights for policy 0, policy_version 290452 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:52,194][626795] Updated weights for policy 0, policy_version 290462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:53,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41642.6, 300 sec: 41515.4). Total num frames: 2379538432. Throughput: 0: 10430.8. Samples: 344882520. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:53,976][24592] Avg episode reward: [(0, '4.906')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:54,126][626795] Updated weights for policy 0, policy_version 290472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:56,098][626795] Updated weights for policy 0, policy_version 290482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:21:58,035][626795] Updated weights for policy 0, policy_version 290492 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:58,975][24592] Fps is (10 sec: 41780.4, 60 sec: 41506.2, 300 sec: 41515.4). Total num frames: 2379743232. Throughput: 0: 10415.0. Samples: 344912700. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:21:58,976][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:00,158][626795] Updated weights for policy 0, policy_version 290502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:01,970][626795] Updated weights for policy 0, policy_version 290512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:03,796][626795] Updated weights for policy 0, policy_version 290522 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41642.7, 300 sec: 41570.9). Total num frames: 2379956224. Throughput: 0: 10438.7. Samples: 344976138. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:03,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:05,806][626795] Updated weights for policy 0, policy_version 290532 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:07,315][626772] Signal inference workers to stop experience collection... (4500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:07,315][626772] Signal inference workers to resume experience collection... (4500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:07,324][626795] InferenceWorker_p0-w0: stopping experience collection (4500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:07,328][626795] InferenceWorker_p0-w0: resuming experience collection (4500 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:07,792][626795] Updated weights for policy 0, policy_version 290542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:08,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41915.6, 300 sec: 41570.9). Total num frames: 2380169216. Throughput: 0: 10433.5. Samples: 345039636. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:08,976][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:09,805][626795] Updated weights for policy 0, policy_version 290552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:11,826][626795] Updated weights for policy 0, policy_version 290562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:13,804][626795] Updated weights for policy 0, policy_version 290572 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:13,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41642.7, 300 sec: 41543.1). Total num frames: 2380365824. Throughput: 0: 10405.6. Samples: 345069756. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:13,976][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:15,714][626795] Updated weights for policy 0, policy_version 290582 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:17,676][626795] Updated weights for policy 0, policy_version 290592 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:18,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41642.6, 300 sec: 41570.9). Total num frames: 2380578816. Throughput: 0: 10400.8. Samples: 345132768. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:18,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:19,720][626795] Updated weights for policy 0, policy_version 290602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:21,524][626795] Updated weights for policy 0, policy_version 290612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:23,493][626795] Updated weights for policy 0, policy_version 290622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:23,976][24592] Fps is (10 sec: 41777.4, 60 sec: 41505.8, 300 sec: 41515.3). Total num frames: 2380783616. Throughput: 0: 10437.2. Samples: 345196320. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:23,977][24592] Avg episode reward: [(0, '4.837')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:25,506][626795] Updated weights for policy 0, policy_version 290632 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:27,427][626795] Updated weights for policy 0, policy_version 290642 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:28,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.2, 300 sec: 41543.2). Total num frames: 2381004800. Throughput: 0: 10447.6. Samples: 345227112. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:28,977][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:29,417][626795] Updated weights for policy 0, policy_version 290652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:31,295][626795] Updated weights for policy 0, policy_version 290662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:33,255][626795] Updated weights for policy 0, policy_version 290672 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:33,975][24592] Fps is (10 sec: 43419.8, 60 sec: 41779.3, 300 sec: 41598.7). Total num frames: 2381217792. Throughput: 0: 10457.3. Samples: 345290394. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:33,977][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:35,215][626795] Updated weights for policy 0, policy_version 290682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:37,144][626795] Updated weights for policy 0, policy_version 290692 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:38,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.5, 300 sec: 41570.9). Total num frames: 2381422592. Throughput: 0: 10488.5. Samples: 345354504. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:38,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:39,085][626795] Updated weights for policy 0, policy_version 290702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:40,999][626795] Updated weights for policy 0, policy_version 290712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:43,002][626795] Updated weights for policy 0, policy_version 290722 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:43,975][24592] Fps is (10 sec: 41778.6, 60 sec: 41915.7, 300 sec: 41598.8). Total num frames: 2381635584. Throughput: 0: 10517.0. Samples: 345385968. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:43,976][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:44,909][626795] Updated weights for policy 0, policy_version 290732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:46,934][626795] Updated weights for policy 0, policy_version 290742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:48,816][626795] Updated weights for policy 0, policy_version 290752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:48,976][24592] Fps is (10 sec: 41776.4, 60 sec: 41915.5, 300 sec: 41598.7). Total num frames: 2381840384. Throughput: 0: 10485.8. Samples: 345448008. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:48,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:50,845][626795] Updated weights for policy 0, policy_version 290762 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:52,741][626795] Updated weights for policy 0, policy_version 290772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:53,976][24592] Fps is (10 sec: 41775.8, 60 sec: 41915.1, 300 sec: 41626.3). Total num frames: 2382053376. Throughput: 0: 10477.4. Samples: 345511128. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:53,977][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:54,777][626795] Updated weights for policy 0, policy_version 290782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:56,707][626795] Updated weights for policy 0, policy_version 290792 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:22:58,697][626795] Updated weights for policy 0, policy_version 290802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:58,975][24592] Fps is (10 sec: 42601.2, 60 sec: 42052.2, 300 sec: 41626.5). Total num frames: 2382266368. Throughput: 0: 10500.0. Samples: 345542256. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:22:58,976][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:00,550][626795] Updated weights for policy 0, policy_version 290812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:02,485][626795] Updated weights for policy 0, policy_version 290822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:03,979][24592] Fps is (10 sec: 42587.6, 60 sec: 42049.8, 300 sec: 41653.8). Total num frames: 2382479360. Throughput: 0: 10517.2. Samples: 345606078. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:03,980][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000290830_2382479360.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:04,113][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000289609_2372476928.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:04,468][626795] Updated weights for policy 0, policy_version 290832 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:06,464][626795] Updated weights for policy 0, policy_version 290842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:08,261][626795] Updated weights for policy 0, policy_version 290852 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:08,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41915.9, 300 sec: 41682.0). Total num frames: 2382684160. Throughput: 0: 10514.5. Samples: 345669468. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:08,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:10,256][626795] Updated weights for policy 0, policy_version 290862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:12,137][626795] Updated weights for policy 0, policy_version 290872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:13,975][24592] Fps is (10 sec: 41793.5, 60 sec: 42188.8, 300 sec: 41682.0). Total num frames: 2382897152. Throughput: 0: 10534.3. Samples: 345701154. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:13,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:14,045][626795] Updated weights for policy 0, policy_version 290882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:16,119][626795] Updated weights for policy 0, policy_version 290892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:18,149][626795] Updated weights for policy 0, policy_version 290902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:18,976][24592] Fps is (10 sec: 41778.5, 60 sec: 42052.2, 300 sec: 41682.0). Total num frames: 2383101952. Throughput: 0: 10504.9. Samples: 345763116. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:18,977][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:19,990][626795] Updated weights for policy 0, policy_version 290912 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:22,118][626795] Updated weights for policy 0, policy_version 290922 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:23,976][24592] Fps is (10 sec: 40958.2, 60 sec: 42052.3, 300 sec: 41626.4). Total num frames: 2383306752. Throughput: 0: 10473.2. Samples: 345825804. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:23,980][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:23,999][626795] Updated weights for policy 0, policy_version 290932 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:26,078][626795] Updated weights for policy 0, policy_version 290942 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:27,851][626795] Updated weights for policy 0, policy_version 290952 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:28,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41915.7, 300 sec: 41654.3). Total num frames: 2383519744. Throughput: 0: 10474.7. Samples: 345857328. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:28,976][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:29,919][626795] Updated weights for policy 0, policy_version 290962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:31,764][626795] Updated weights for policy 0, policy_version 290972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:33,702][626795] Updated weights for policy 0, policy_version 290982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:33,981][24592] Fps is (10 sec: 42575.7, 60 sec: 41911.7, 300 sec: 41653.4). Total num frames: 2383732736. Throughput: 0: 10499.5. Samples: 345920538. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:33,992][24592] Avg episode reward: [(0, '4.861')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:35,643][626795] Updated weights for policy 0, policy_version 290992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:37,673][626795] Updated weights for policy 0, policy_version 291002 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:38,976][24592] Fps is (10 sec: 41777.2, 60 sec: 41915.4, 300 sec: 41682.8). Total num frames: 2383937536. Throughput: 0: 10497.2. Samples: 345983496. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:38,979][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:39,667][626795] Updated weights for policy 0, policy_version 291012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:41,502][626795] Updated weights for policy 0, policy_version 291022 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:43,415][626795] Updated weights for policy 0, policy_version 291032 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:43,976][24592] Fps is (10 sec: 41803.1, 60 sec: 41915.7, 300 sec: 41682.0). Total num frames: 2384150528. Throughput: 0: 10513.8. Samples: 346015380. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:43,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:45,428][626795] Updated weights for policy 0, policy_version 291042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:47,472][626795] Updated weights for policy 0, policy_version 291052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:48,975][24592] Fps is (10 sec: 42600.1, 60 sec: 42052.7, 300 sec: 41709.9). Total num frames: 2384363520. Throughput: 0: 10477.8. Samples: 346077546. Policy #0 lag: (min: 0.0, avg: 2.3, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:48,976][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:49,504][626795] Updated weights for policy 0, policy_version 291062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:51,356][626795] Updated weights for policy 0, policy_version 291072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:53,355][626795] Updated weights for policy 0, policy_version 291082 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:53,976][24592] Fps is (10 sec: 41778.9, 60 sec: 41916.3, 300 sec: 41710.1). Total num frames: 2384568320. Throughput: 0: 10458.1. Samples: 346140084. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:53,976][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:55,219][626795] Updated weights for policy 0, policy_version 291092 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:57,298][626795] Updated weights for policy 0, policy_version 291102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:58,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41915.7, 300 sec: 41709.8). Total num frames: 2384781312. Throughput: 0: 10440.7. Samples: 346170984. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:23:58,978][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:23:59,274][626795] Updated weights for policy 0, policy_version 291112 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:01,166][626795] Updated weights for policy 0, policy_version 291122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:03,022][626795] Updated weights for policy 0, policy_version 291132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:03,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41781.4, 300 sec: 41709.7). Total num frames: 2384986112. Throughput: 0: 10481.6. Samples: 346234788. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:03,978][24592] Avg episode reward: [(0, '4.431')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:04,997][626795] Updated weights for policy 0, policy_version 291142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:06,960][626795] Updated weights for policy 0, policy_version 291152 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:08,826][626795] Updated weights for policy 0, policy_version 291162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:08,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41915.8, 300 sec: 41737.6). Total num frames: 2385199104. Throughput: 0: 10498.8. Samples: 346298244. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:08,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:10,867][626795] Updated weights for policy 0, policy_version 291172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:12,784][626795] Updated weights for policy 0, policy_version 291182 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:13,975][24592] Fps is (10 sec: 42599.8, 60 sec: 41915.8, 300 sec: 41737.8). Total num frames: 2385412096. Throughput: 0: 10494.7. Samples: 346329588. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:13,976][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:14,797][626795] Updated weights for policy 0, policy_version 291192 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:16,642][626795] Updated weights for policy 0, policy_version 291202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:18,602][626795] Updated weights for policy 0, policy_version 291212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:18,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41915.8, 300 sec: 41709.8). Total num frames: 2385616896. Throughput: 0: 10504.8. Samples: 346393194. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:18,976][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:20,594][626795] Updated weights for policy 0, policy_version 291222 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:22,648][626795] Updated weights for policy 0, policy_version 291232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:23,981][24592] Fps is (10 sec: 41755.8, 60 sec: 42048.7, 300 sec: 41792.3). Total num frames: 2385829888. Throughput: 0: 10474.6. Samples: 346454904. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:23,982][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:24,584][626795] Updated weights for policy 0, policy_version 291242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:26,689][626795] Updated weights for policy 0, policy_version 291252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:28,427][626795] Updated weights for policy 0, policy_version 291262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:28,976][24592] Fps is (10 sec: 41779.1, 60 sec: 41915.6, 300 sec: 41793.1). Total num frames: 2386034688. Throughput: 0: 10448.1. Samples: 346485546. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:28,978][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:30,488][626795] Updated weights for policy 0, policy_version 291272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:32,450][626795] Updated weights for policy 0, policy_version 291282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:33,976][24592] Fps is (10 sec: 41801.8, 60 sec: 41919.7, 300 sec: 41793.1). Total num frames: 2386247680. Throughput: 0: 10461.2. Samples: 346548300. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:33,977][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:34,517][626795] Updated weights for policy 0, policy_version 291292 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:36,355][626795] Updated weights for policy 0, policy_version 291302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:38,390][626795] Updated weights for policy 0, policy_version 291312 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:38,975][24592] Fps is (10 sec: 41780.0, 60 sec: 41916.1, 300 sec: 41737.5). Total num frames: 2386452480. Throughput: 0: 10479.0. Samples: 346611636. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:38,978][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:40,259][626795] Updated weights for policy 0, policy_version 291322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:42,245][626795] Updated weights for policy 0, policy_version 291332 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:43,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41915.8, 300 sec: 41765.7). Total num frames: 2386665472. Throughput: 0: 10486.4. Samples: 346642872. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:43,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:44,190][626795] Updated weights for policy 0, policy_version 291342 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:46,171][626795] Updated weights for policy 0, policy_version 291352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:48,000][626795] Updated weights for policy 0, policy_version 291362 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:48,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.2, 300 sec: 41765.5). Total num frames: 2386870272. Throughput: 0: 10464.1. Samples: 346705668. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:48,976][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:50,002][626795] Updated weights for policy 0, policy_version 291372 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:52,054][626795] Updated weights for policy 0, policy_version 291382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:53,976][24592] Fps is (10 sec: 40958.8, 60 sec: 41779.1, 300 sec: 41737.5). Total num frames: 2387075072. Throughput: 0: 10436.7. Samples: 346767900. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:53,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:54,018][626795] Updated weights for policy 0, policy_version 291392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:56,047][626795] Updated weights for policy 0, policy_version 291402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:24:58,087][626795] Updated weights for policy 0, policy_version 291412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:58,976][24592] Fps is (10 sec: 40957.9, 60 sec: 41642.3, 300 sec: 41737.5). Total num frames: 2387279872. Throughput: 0: 10408.7. Samples: 346797984. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:24:58,977][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:00,035][626795] Updated weights for policy 0, policy_version 291422 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:02,079][626795] Updated weights for policy 0, policy_version 291432 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:03,858][626795] Updated weights for policy 0, policy_version 291442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:03,976][24592] Fps is (10 sec: 41779.6, 60 sec: 41779.2, 300 sec: 41737.5). Total num frames: 2387492864. Throughput: 0: 10374.4. Samples: 346860042. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:03,977][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000291442_2387492864.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:04,067][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000290217_2377457664.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:05,937][626795] Updated weights for policy 0, policy_version 291452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:07,862][626795] Updated weights for policy 0, policy_version 291462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:08,977][24592] Fps is (10 sec: 41774.2, 60 sec: 41641.4, 300 sec: 41765.1). Total num frames: 2387697664. Throughput: 0: 10406.0. Samples: 346923132. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:08,978][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:09,859][626795] Updated weights for policy 0, policy_version 291472 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:11,808][626795] Updated weights for policy 0, policy_version 291482 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:13,756][626795] Updated weights for policy 0, policy_version 291492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:13,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41642.6, 300 sec: 41765.3). Total num frames: 2387910656. Throughput: 0: 10420.2. Samples: 346954452. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:13,977][24592] Avg episode reward: [(0, '4.896')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:15,690][626795] Updated weights for policy 0, policy_version 291502 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:17,585][626795] Updated weights for policy 0, policy_version 291512 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:18,975][24592] Fps is (10 sec: 42606.0, 60 sec: 41779.3, 300 sec: 41765.3). Total num frames: 2388123648. Throughput: 0: 10427.2. Samples: 347017524. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:18,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:19,645][626795] Updated weights for policy 0, policy_version 291522 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:21,497][626795] Updated weights for policy 0, policy_version 291532 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:23,599][626795] Updated weights for policy 0, policy_version 291542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:23,976][24592] Fps is (10 sec: 41778.9, 60 sec: 41646.4, 300 sec: 41765.3). Total num frames: 2388328448. Throughput: 0: 10413.8. Samples: 347080260. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:23,978][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:25,537][626795] Updated weights for policy 0, policy_version 291552 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:27,548][626795] Updated weights for policy 0, policy_version 291562 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:28,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41642.8, 300 sec: 41765.3). Total num frames: 2388533248. Throughput: 0: 10386.7. Samples: 347110272. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:28,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:29,576][626795] Updated weights for policy 0, policy_version 291572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:31,601][626795] Updated weights for policy 0, policy_version 291582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:33,478][626795] Updated weights for policy 0, policy_version 291592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:33,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41506.2, 300 sec: 41737.6). Total num frames: 2388738048. Throughput: 0: 10358.5. Samples: 347171802. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:33,984][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:35,486][626795] Updated weights for policy 0, policy_version 291602 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:37,497][626795] Updated weights for policy 0, policy_version 291612 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:38,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41642.7, 300 sec: 41765.4). Total num frames: 2388951040. Throughput: 0: 10385.6. Samples: 347235246. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:38,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:39,389][626795] Updated weights for policy 0, policy_version 291622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:41,285][626795] Updated weights for policy 0, policy_version 291632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:43,300][626795] Updated weights for policy 0, policy_version 291642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:43,976][24592] Fps is (10 sec: 42595.9, 60 sec: 41642.2, 300 sec: 41765.2). Total num frames: 2389164032. Throughput: 0: 10405.7. Samples: 347266242. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:43,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:45,348][626795] Updated weights for policy 0, policy_version 291652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:47,342][626795] Updated weights for policy 0, policy_version 291662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:48,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41642.7, 300 sec: 41793.1). Total num frames: 2389368832. Throughput: 0: 10420.3. Samples: 347328954. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:48,977][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:49,159][626795] Updated weights for policy 0, policy_version 291672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:51,091][626795] Updated weights for policy 0, policy_version 291682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:53,082][626795] Updated weights for policy 0, policy_version 291692 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:53,975][24592] Fps is (10 sec: 40962.6, 60 sec: 41642.9, 300 sec: 41765.3). Total num frames: 2389573632. Throughput: 0: 10405.3. Samples: 347391354. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:53,977][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:55,258][626795] Updated weights for policy 0, policy_version 291702 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:57,183][626795] Updated weights for policy 0, policy_version 291712 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:58,976][24592] Fps is (10 sec: 40956.4, 60 sec: 41642.4, 300 sec: 41765.2). Total num frames: 2389778432. Throughput: 0: 10375.8. Samples: 347421372. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:25:58,978][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:25:59,287][626795] Updated weights for policy 0, policy_version 291722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:01,163][626795] Updated weights for policy 0, policy_version 291732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:03,187][626795] Updated weights for policy 0, policy_version 291742 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:03,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41506.3, 300 sec: 41793.1). Total num frames: 2389983232. Throughput: 0: 10350.9. Samples: 347483316. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:03,977][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:05,085][626795] Updated weights for policy 0, policy_version 291752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:07,068][626795] Updated weights for policy 0, policy_version 291762 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:08,975][24592] Fps is (10 sec: 40964.0, 60 sec: 41507.4, 300 sec: 41765.3). Total num frames: 2390188032. Throughput: 0: 10353.1. Samples: 347546148. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:08,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:09,044][626795] Updated weights for policy 0, policy_version 291772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:10,969][626795] Updated weights for policy 0, policy_version 291782 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:12,937][626795] Updated weights for policy 0, policy_version 291792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41506.1, 300 sec: 41765.3). Total num frames: 2390401024. Throughput: 0: 10378.3. Samples: 347577294. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:13,977][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:14,939][626795] Updated weights for policy 0, policy_version 291802 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:16,843][626795] Updated weights for policy 0, policy_version 291812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:18,803][626795] Updated weights for policy 0, policy_version 291822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:18,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41506.2, 300 sec: 41765.3). Total num frames: 2390614016. Throughput: 0: 10409.1. Samples: 347640210. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:18,976][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:20,804][626795] Updated weights for policy 0, policy_version 291832 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:22,574][626795] Updated weights for policy 0, policy_version 291842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:23,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41642.8, 300 sec: 41793.1). Total num frames: 2390827008. Throughput: 0: 10405.3. Samples: 347703486. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:23,976][24592] Avg episode reward: [(0, '4.382')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:24,699][626795] Updated weights for policy 0, policy_version 291852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:26,632][626795] Updated weights for policy 0, policy_version 291862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:28,559][626795] Updated weights for policy 0, policy_version 291872 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:28,975][24592] Fps is (10 sec: 40959.4, 60 sec: 41506.1, 300 sec: 41737.6). Total num frames: 2391023616. Throughput: 0: 10409.6. Samples: 347734668. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:28,977][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:30,653][626795] Updated weights for policy 0, policy_version 291882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:32,636][626795] Updated weights for policy 0, policy_version 291892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:33,975][24592] Fps is (10 sec: 40959.3, 60 sec: 41642.6, 300 sec: 41765.4). Total num frames: 2391236608. Throughput: 0: 10372.3. Samples: 347795706. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:33,978][24592] Avg episode reward: [(0, '4.779')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:34,704][626795] Updated weights for policy 0, policy_version 291902 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:36,542][626795] Updated weights for policy 0, policy_version 291912 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:38,617][626795] Updated weights for policy 0, policy_version 291922 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:38,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41506.1, 300 sec: 41765.3). Total num frames: 2391441408. Throughput: 0: 10375.6. Samples: 347858256. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:38,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:40,449][626795] Updated weights for policy 0, policy_version 291932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:42,516][626795] Updated weights for policy 0, policy_version 291942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:43,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41370.1, 300 sec: 41765.4). Total num frames: 2391646208. Throughput: 0: 10400.0. Samples: 347889360. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:43,977][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:44,516][626795] Updated weights for policy 0, policy_version 291952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:46,421][626795] Updated weights for policy 0, policy_version 291962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:48,345][626795] Updated weights for policy 0, policy_version 291972 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.2, 300 sec: 41765.3). Total num frames: 2391859200. Throughput: 0: 10413.7. Samples: 347951934. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:48,977][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:50,409][626795] Updated weights for policy 0, policy_version 291982 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:52,241][626795] Updated weights for policy 0, policy_version 291992 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:53,977][24592] Fps is (10 sec: 42592.9, 60 sec: 41641.8, 300 sec: 41792.9). Total num frames: 2392072192. Throughput: 0: 10417.0. Samples: 348014928. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:53,978][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:54,292][626795] Updated weights for policy 0, policy_version 292002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:56,159][626795] Updated weights for policy 0, policy_version 292012 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:26:58,068][626795] Updated weights for policy 0, policy_version 292022 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:58,976][24592] Fps is (10 sec: 41777.4, 60 sec: 41643.1, 300 sec: 41765.3). Total num frames: 2392276992. Throughput: 0: 10422.5. Samples: 348046308. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:26:58,977][24592] Avg episode reward: [(0, '4.430')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:00,243][626795] Updated weights for policy 0, policy_version 292032 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:02,166][626795] Updated weights for policy 0, policy_version 292042 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:03,975][24592] Fps is (10 sec: 40965.1, 60 sec: 41642.7, 300 sec: 41737.6). Total num frames: 2392481792. Throughput: 0: 10387.2. Samples: 348107634. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:03,978][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000292051_2392481792.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:04,075][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000290830_2382479360.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:04,191][626795] Updated weights for policy 0, policy_version 292052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:06,148][626795] Updated weights for policy 0, policy_version 292062 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:08,208][626795] Updated weights for policy 0, policy_version 292072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:08,975][24592] Fps is (10 sec: 40961.5, 60 sec: 41642.6, 300 sec: 41765.3). Total num frames: 2392686592. Throughput: 0: 10351.7. Samples: 348169314. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:08,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:10,207][626795] Updated weights for policy 0, policy_version 292082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:12,176][626795] Updated weights for policy 0, policy_version 292092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:13,895][626795] Updated weights for policy 0, policy_version 292102 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:13,976][24592] Fps is (10 sec: 41777.8, 60 sec: 41642.5, 300 sec: 41765.3). Total num frames: 2392899584. Throughput: 0: 10355.3. Samples: 348200658. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:13,977][24592] Avg episode reward: [(0, '4.930')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:15,996][626795] Updated weights for policy 0, policy_version 292112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:17,878][626795] Updated weights for policy 0, policy_version 292122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:18,976][24592] Fps is (10 sec: 41777.8, 60 sec: 41505.8, 300 sec: 41765.3). Total num frames: 2393104384. Throughput: 0: 10404.1. Samples: 348263892. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:18,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:19,948][626795] Updated weights for policy 0, policy_version 292132 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:21,687][626795] Updated weights for policy 0, policy_version 292142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:23,705][626795] Updated weights for policy 0, policy_version 292152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:23,975][24592] Fps is (10 sec: 41780.8, 60 sec: 41506.1, 300 sec: 41737.6). Total num frames: 2393317376. Throughput: 0: 10420.9. Samples: 348327198. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:23,976][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:25,808][626795] Updated weights for policy 0, policy_version 292162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:27,543][626795] Updated weights for policy 0, policy_version 292172 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:28,975][24592] Fps is (10 sec: 41780.3, 60 sec: 41642.7, 300 sec: 41709.8). Total num frames: 2393522176. Throughput: 0: 10422.8. Samples: 348358386. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:28,976][24592] Avg episode reward: [(0, '4.371')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:29,636][626795] Updated weights for policy 0, policy_version 292182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:31,537][626795] Updated weights for policy 0, policy_version 292192 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:33,515][626795] Updated weights for policy 0, policy_version 292202 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:33,976][24592] Fps is (10 sec: 41777.8, 60 sec: 41642.5, 300 sec: 41737.5). Total num frames: 2393735168. Throughput: 0: 10432.1. Samples: 348421380. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:33,978][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:35,630][626795] Updated weights for policy 0, policy_version 292212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:37,563][626795] Updated weights for policy 0, policy_version 292222 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:38,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41642.7, 300 sec: 41709.8). Total num frames: 2393939968. Throughput: 0: 10401.0. Samples: 348482958. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:38,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:39,584][626795] Updated weights for policy 0, policy_version 292232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:41,415][626795] Updated weights for policy 0, policy_version 292242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:43,397][626795] Updated weights for policy 0, policy_version 292252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:43,976][24592] Fps is (10 sec: 40958.4, 60 sec: 41642.2, 300 sec: 41709.8). Total num frames: 2394144768. Throughput: 0: 10395.8. Samples: 348514122. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:43,978][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:45,422][626795] Updated weights for policy 0, policy_version 292262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:47,347][626795] Updated weights for policy 0, policy_version 292272 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:48,976][24592] Fps is (10 sec: 41777.6, 60 sec: 41642.4, 300 sec: 41709.8). Total num frames: 2394357760. Throughput: 0: 10427.1. Samples: 348576858. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:48,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:49,381][626795] Updated weights for policy 0, policy_version 292282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:51,172][626795] Updated weights for policy 0, policy_version 292292 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:53,161][626795] Updated weights for policy 0, policy_version 292302 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:53,975][24592] Fps is (10 sec: 41782.2, 60 sec: 41507.0, 300 sec: 41682.0). Total num frames: 2394562560. Throughput: 0: 10454.7. Samples: 348639774. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:53,976][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:55,223][626795] Updated weights for policy 0, policy_version 292312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:57,161][626795] Updated weights for policy 0, policy_version 292322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:27:58,972][626795] Updated weights for policy 0, policy_version 292332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:58,975][24592] Fps is (10 sec: 42600.3, 60 sec: 41779.5, 300 sec: 41710.3). Total num frames: 2394783744. Throughput: 0: 10457.7. Samples: 348671250. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:27:58,976][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:01,061][626795] Updated weights for policy 0, policy_version 292342 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:03,003][626795] Updated weights for policy 0, policy_version 292352 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:03,976][24592] Fps is (10 sec: 42597.8, 60 sec: 41779.1, 300 sec: 41709.8). Total num frames: 2394988544. Throughput: 0: 10450.2. Samples: 348734148. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:03,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:05,007][626795] Updated weights for policy 0, policy_version 292362 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:06,979][626795] Updated weights for policy 0, policy_version 292372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:08,976][24592] Fps is (10 sec: 40140.0, 60 sec: 41642.6, 300 sec: 41654.2). Total num frames: 2395185152. Throughput: 0: 10418.0. Samples: 348796008. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:08,977][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:08,983][626795] Updated weights for policy 0, policy_version 292382 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:10,952][626795] Updated weights for policy 0, policy_version 292392 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:12,968][626795] Updated weights for policy 0, policy_version 292402 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:13,975][24592] Fps is (10 sec: 40141.5, 60 sec: 41506.4, 300 sec: 41654.3). Total num frames: 2395389952. Throughput: 0: 10398.7. Samples: 348826326. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:13,977][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:14,851][626795] Updated weights for policy 0, policy_version 292412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:16,834][626795] Updated weights for policy 0, policy_version 292422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:18,745][626795] Updated weights for policy 0, policy_version 292432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:18,981][24592] Fps is (10 sec: 42577.6, 60 sec: 41775.9, 300 sec: 41709.1). Total num frames: 2395611136. Throughput: 0: 10404.5. Samples: 348889632. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:18,982][24592] Avg episode reward: [(0, '5.036')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:20,653][626795] Updated weights for policy 0, policy_version 292442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:22,623][626795] Updated weights for policy 0, policy_version 292452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:23,977][24592] Fps is (10 sec: 43411.7, 60 sec: 41778.3, 300 sec: 41709.6). Total num frames: 2395824128. Throughput: 0: 10457.7. Samples: 348953568. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:23,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:24,606][626795] Updated weights for policy 0, policy_version 292462 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:26,457][626795] Updated weights for policy 0, policy_version 292472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:28,445][626795] Updated weights for policy 0, policy_version 292482 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:28,975][24592] Fps is (10 sec: 42620.1, 60 sec: 41915.8, 300 sec: 41710.6). Total num frames: 2396037120. Throughput: 0: 10465.5. Samples: 348985062. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:28,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:30,417][626795] Updated weights for policy 0, policy_version 292492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:32,236][626795] Updated weights for policy 0, policy_version 292502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:33,975][24592] Fps is (10 sec: 42604.1, 60 sec: 41916.0, 300 sec: 41737.6). Total num frames: 2396250112. Throughput: 0: 10481.7. Samples: 349048530. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:33,978][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:34,191][626795] Updated weights for policy 0, policy_version 292512 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:36,166][626795] Updated weights for policy 0, policy_version 292522 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:38,130][626795] Updated weights for policy 0, policy_version 292532 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:38,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41915.7, 300 sec: 41709.8). Total num frames: 2396454912. Throughput: 0: 10495.1. Samples: 349112052. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:38,977][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:40,193][626795] Updated weights for policy 0, policy_version 292542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:41,969][626795] Updated weights for policy 0, policy_version 292552 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:43,976][24592] Fps is (10 sec: 40957.7, 60 sec: 41915.9, 300 sec: 41681.9). Total num frames: 2396659712. Throughput: 0: 10485.6. Samples: 349143108. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:43,981][24592] Avg episode reward: [(0, '4.902')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:44,005][626795] Updated weights for policy 0, policy_version 292562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:45,969][626795] Updated weights for policy 0, policy_version 292572 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:47,912][626795] Updated weights for policy 0, policy_version 292582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41916.0, 300 sec: 41709.8). Total num frames: 2396872704. Throughput: 0: 10485.1. Samples: 349205976. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:48,977][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:49,909][626795] Updated weights for policy 0, policy_version 292592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:51,781][626795] Updated weights for policy 0, policy_version 292602 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:53,729][626795] Updated weights for policy 0, policy_version 292612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:53,975][24592] Fps is (10 sec: 42600.4, 60 sec: 42052.2, 300 sec: 41709.8). Total num frames: 2397085696. Throughput: 0: 10518.2. Samples: 349269324. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:53,976][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:55,661][626795] Updated weights for policy 0, policy_version 292622 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:57,628][626795] Updated weights for policy 0, policy_version 292632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:58,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41915.7, 300 sec: 41737.6). Total num frames: 2397298688. Throughput: 0: 10542.7. Samples: 349300746. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:28:58,976][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:28:59,697][626795] Updated weights for policy 0, policy_version 292642 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:01,548][626795] Updated weights for policy 0, policy_version 292652 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:03,551][626795] Updated weights for policy 0, policy_version 292662 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:03,976][24592] Fps is (10 sec: 41776.3, 60 sec: 41915.3, 300 sec: 41709.7). Total num frames: 2397503488. Throughput: 0: 10516.7. Samples: 349362840. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:03,978][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000292664_2397503488.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:04,107][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000291442_2387492864.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:05,550][626795] Updated weights for policy 0, policy_version 292672 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:07,468][626795] Updated weights for policy 0, policy_version 292682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:08,976][24592] Fps is (10 sec: 41777.9, 60 sec: 42188.7, 300 sec: 41709.7). Total num frames: 2397716480. Throughput: 0: 10497.4. Samples: 349425942. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:08,977][24592] Avg episode reward: [(0, '4.956')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:09,521][626795] Updated weights for policy 0, policy_version 292692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:11,328][626795] Updated weights for policy 0, policy_version 292702 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:13,376][626795] Updated weights for policy 0, policy_version 292712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:13,975][24592] Fps is (10 sec: 40962.9, 60 sec: 42052.2, 300 sec: 41682.0). Total num frames: 2397913088. Throughput: 0: 10485.6. Samples: 349456914. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:13,976][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:15,416][626795] Updated weights for policy 0, policy_version 292722 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:17,323][626795] Updated weights for policy 0, policy_version 292732 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:18,975][24592] Fps is (10 sec: 41780.0, 60 sec: 42055.8, 300 sec: 41710.6). Total num frames: 2398134272. Throughput: 0: 10465.7. Samples: 349519488. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:18,976][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:19,355][626795] Updated weights for policy 0, policy_version 292742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:21,145][626795] Updated weights for policy 0, policy_version 292752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:23,136][626795] Updated weights for policy 0, policy_version 292762 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:23,975][24592] Fps is (10 sec: 42598.0, 60 sec: 41916.6, 300 sec: 41709.8). Total num frames: 2398339072. Throughput: 0: 10455.7. Samples: 349582560. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:23,977][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:25,195][626795] Updated weights for policy 0, policy_version 292772 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:26,919][626795] Updated weights for policy 0, policy_version 292782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:28,959][626795] Updated weights for policy 0, policy_version 292792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:28,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41915.7, 300 sec: 41709.8). Total num frames: 2398552064. Throughput: 0: 10474.2. Samples: 349614444. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:28,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:30,892][626795] Updated weights for policy 0, policy_version 292802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:32,756][626795] Updated weights for policy 0, policy_version 292812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:33,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41915.7, 300 sec: 41737.5). Total num frames: 2398765056. Throughput: 0: 10493.2. Samples: 349678170. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:33,977][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:34,748][626795] Updated weights for policy 0, policy_version 292822 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:36,678][626795] Updated weights for policy 0, policy_version 292832 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:38,544][626795] Updated weights for policy 0, policy_version 292842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:38,976][24592] Fps is (10 sec: 42596.4, 60 sec: 42051.9, 300 sec: 41737.5). Total num frames: 2398978048. Throughput: 0: 10515.0. Samples: 349742502. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:38,978][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:40,560][626795] Updated weights for policy 0, policy_version 292852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:42,575][626795] Updated weights for policy 0, policy_version 292862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:43,976][24592] Fps is (10 sec: 41778.3, 60 sec: 42052.5, 300 sec: 41737.5). Total num frames: 2399182848. Throughput: 0: 10496.7. Samples: 349773102. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:43,978][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:44,612][626795] Updated weights for policy 0, policy_version 292872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:46,477][626795] Updated weights for policy 0, policy_version 292882 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:48,100][626772] Signal inference workers to stop experience collection... (4550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:48,101][626772] Signal inference workers to resume experience collection... (4550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:48,108][626795] InferenceWorker_p0-w0: stopping experience collection (4550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:48,112][626795] InferenceWorker_p0-w0: resuming experience collection (4550 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:48,463][626795] Updated weights for policy 0, policy_version 292892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:48,975][24592] Fps is (10 sec: 40962.1, 60 sec: 41915.7, 300 sec: 41737.6). Total num frames: 2399387648. Throughput: 0: 10496.0. Samples: 349835154. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:48,977][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:50,403][626795] Updated weights for policy 0, policy_version 292902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:52,407][626795] Updated weights for policy 0, policy_version 292912 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:53,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41915.8, 300 sec: 41765.4). Total num frames: 2399600640. Throughput: 0: 10474.6. Samples: 349897296. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:53,978][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:54,498][626795] Updated weights for policy 0, policy_version 292922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:56,312][626795] Updated weights for policy 0, policy_version 292932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:29:58,336][626795] Updated weights for policy 0, policy_version 292942 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:58,976][24592] Fps is (10 sec: 41777.9, 60 sec: 41778.9, 300 sec: 41737.5). Total num frames: 2399805440. Throughput: 0: 10475.5. Samples: 349928316. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:29:58,978][24592] Avg episode reward: [(0, '4.352')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:00,334][626795] Updated weights for policy 0, policy_version 292952 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:02,337][626795] Updated weights for policy 0, policy_version 292962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41916.3, 300 sec: 41765.6). Total num frames: 2400018432. Throughput: 0: 10481.1. Samples: 349991136. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:03,976][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:04,170][626795] Updated weights for policy 0, policy_version 292972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:06,080][626795] Updated weights for policy 0, policy_version 292982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:08,039][626795] Updated weights for policy 0, policy_version 292992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:08,975][24592] Fps is (10 sec: 42600.0, 60 sec: 41915.9, 300 sec: 41765.3). Total num frames: 2400231424. Throughput: 0: 10501.8. Samples: 350055138. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:08,977][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:10,001][626795] Updated weights for policy 0, policy_version 293002 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:11,901][626795] Updated weights for policy 0, policy_version 293012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:13,809][626795] Updated weights for policy 0, policy_version 293022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:13,976][24592] Fps is (10 sec: 41778.6, 60 sec: 42052.2, 300 sec: 41737.5). Total num frames: 2400436224. Throughput: 0: 10506.4. Samples: 350087232. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:13,977][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:15,865][626795] Updated weights for policy 0, policy_version 293032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:17,790][626795] Updated weights for policy 0, policy_version 293042 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:18,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41915.7, 300 sec: 41765.3). Total num frames: 2400649216. Throughput: 0: 10460.9. Samples: 350148912. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:18,977][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:19,986][626795] Updated weights for policy 0, policy_version 293052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:21,906][626795] Updated weights for policy 0, policy_version 293062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:23,751][626795] Updated weights for policy 0, policy_version 293072 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:23,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41779.3, 300 sec: 41737.5). Total num frames: 2400845824. Throughput: 0: 10395.3. Samples: 350210286. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:23,976][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:25,854][626795] Updated weights for policy 0, policy_version 293082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:27,782][626795] Updated weights for policy 0, policy_version 293092 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:28,975][24592] Fps is (10 sec: 40141.4, 60 sec: 41642.7, 300 sec: 41737.6). Total num frames: 2401050624. Throughput: 0: 10408.2. Samples: 350241468. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:28,977][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:29,784][626795] Updated weights for policy 0, policy_version 293102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:31,635][626795] Updated weights for policy 0, policy_version 293112 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:33,619][626795] Updated weights for policy 0, policy_version 293122 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:33,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41779.2, 300 sec: 41765.3). Total num frames: 2401271808. Throughput: 0: 10423.3. Samples: 350304204. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:33,977][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:35,527][626795] Updated weights for policy 0, policy_version 293132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:37,476][626795] Updated weights for policy 0, policy_version 293142 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:38,976][24592] Fps is (10 sec: 43415.8, 60 sec: 41779.3, 300 sec: 41765.4). Total num frames: 2401484800. Throughput: 0: 10468.0. Samples: 350368362. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:38,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:39,434][626795] Updated weights for policy 0, policy_version 293152 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:41,368][626795] Updated weights for policy 0, policy_version 293162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:43,271][626795] Updated weights for policy 0, policy_version 293172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:43,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.4, 300 sec: 41765.3). Total num frames: 2401689600. Throughput: 0: 10468.9. Samples: 350399412. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:43,976][24592] Avg episode reward: [(0, '4.940')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:45,287][626795] Updated weights for policy 0, policy_version 293182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:47,216][626795] Updated weights for policy 0, policy_version 293192 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:48,976][24592] Fps is (10 sec: 41779.6, 60 sec: 41915.5, 300 sec: 41793.0). Total num frames: 2401902592. Throughput: 0: 10470.2. Samples: 350462298. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:48,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:49,327][626795] Updated weights for policy 0, policy_version 293202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:51,207][626795] Updated weights for policy 0, policy_version 293212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:53,169][626795] Updated weights for policy 0, policy_version 293222 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:53,977][24592] Fps is (10 sec: 40955.2, 60 sec: 41641.9, 300 sec: 41765.3). Total num frames: 2402099200. Throughput: 0: 10419.1. Samples: 350524008. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:53,978][24592] Avg episode reward: [(0, '4.978')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:55,236][626795] Updated weights for policy 0, policy_version 293232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:57,177][626795] Updated weights for policy 0, policy_version 293242 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:58,976][24592] Fps is (10 sec: 40959.5, 60 sec: 41779.1, 300 sec: 41793.0). Total num frames: 2402312192. Throughput: 0: 10385.0. Samples: 350554560. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:30:58,978][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:30:59,170][626795] Updated weights for policy 0, policy_version 293252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:01,102][626795] Updated weights for policy 0, policy_version 293262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:03,048][626795] Updated weights for policy 0, policy_version 293272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:03,975][24592] Fps is (10 sec: 41784.1, 60 sec: 41642.7, 300 sec: 41793.1). Total num frames: 2402516992. Throughput: 0: 10418.6. Samples: 350617746. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:03,976][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:04,009][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000293277_2402525184.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:04,102][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000292051_2392481792.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:05,080][626795] Updated weights for policy 0, policy_version 293282 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:07,012][626795] Updated weights for policy 0, policy_version 293292 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:08,905][626795] Updated weights for policy 0, policy_version 293302 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:08,975][24592] Fps is (10 sec: 41781.0, 60 sec: 41642.6, 300 sec: 41793.1). Total num frames: 2402729984. Throughput: 0: 10456.3. Samples: 350680818. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:08,976][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:10,755][626795] Updated weights for policy 0, policy_version 293312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:12,685][626795] Updated weights for policy 0, policy_version 293322 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:13,976][24592] Fps is (10 sec: 42597.7, 60 sec: 41779.2, 300 sec: 41793.1). Total num frames: 2402942976. Throughput: 0: 10469.3. Samples: 350712588. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:13,977][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:14,718][626795] Updated weights for policy 0, policy_version 293332 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:16,591][626795] Updated weights for policy 0, policy_version 293342 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:18,663][626795] Updated weights for policy 0, policy_version 293352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:18,976][24592] Fps is (10 sec: 42597.1, 60 sec: 41779.1, 300 sec: 41793.0). Total num frames: 2403155968. Throughput: 0: 10481.4. Samples: 350775870. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:18,977][24592] Avg episode reward: [(0, '4.821')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:20,590][626795] Updated weights for policy 0, policy_version 293362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:22,537][626795] Updated weights for policy 0, policy_version 293372 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:23,977][24592] Fps is (10 sec: 41779.7, 60 sec: 41915.7, 300 sec: 41820.9). Total num frames: 2403360768. Throughput: 0: 10431.7. Samples: 350837784. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:23,980][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:24,548][626795] Updated weights for policy 0, policy_version 293382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:26,519][626795] Updated weights for policy 0, policy_version 293392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:28,438][626795] Updated weights for policy 0, policy_version 293402 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:28,977][24592] Fps is (10 sec: 40953.1, 60 sec: 41914.4, 300 sec: 41792.8). Total num frames: 2403565568. Throughput: 0: 10424.3. Samples: 350868528. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:28,978][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:30,407][626795] Updated weights for policy 0, policy_version 293412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:32,367][626795] Updated weights for policy 0, policy_version 293422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:33,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41779.2, 300 sec: 41820.9). Total num frames: 2403778560. Throughput: 0: 10433.3. Samples: 350931792. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:33,976][24592] Avg episode reward: [(0, '5.089')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:34,354][626795] Updated weights for policy 0, policy_version 293432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:36,326][626795] Updated weights for policy 0, policy_version 293442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:38,175][626795] Updated weights for policy 0, policy_version 293452 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:38,976][24592] Fps is (10 sec: 41787.1, 60 sec: 41642.9, 300 sec: 41820.8). Total num frames: 2403983360. Throughput: 0: 10473.6. Samples: 350995308. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:38,979][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:40,131][626795] Updated weights for policy 0, policy_version 293462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:42,087][626795] Updated weights for policy 0, policy_version 293472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:43,937][626795] Updated weights for policy 0, policy_version 293482 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:43,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41915.7, 300 sec: 41848.6). Total num frames: 2404204544. Throughput: 0: 10496.6. Samples: 351026904. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:43,976][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:45,916][626795] Updated weights for policy 0, policy_version 293492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:47,846][626795] Updated weights for policy 0, policy_version 293502 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:48,977][24592] Fps is (10 sec: 42594.0, 60 sec: 41778.6, 300 sec: 41820.9). Total num frames: 2404409344. Throughput: 0: 10510.4. Samples: 351090726. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:48,980][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:49,947][626795] Updated weights for policy 0, policy_version 293512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:51,764][626795] Updated weights for policy 0, policy_version 293522 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:53,795][626795] Updated weights for policy 0, policy_version 293532 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:53,975][24592] Fps is (10 sec: 41779.1, 60 sec: 42053.1, 300 sec: 41848.7). Total num frames: 2404622336. Throughput: 0: 10483.5. Samples: 351152574. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:53,977][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:55,887][626795] Updated weights for policy 0, policy_version 293542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:57,755][626795] Updated weights for policy 0, policy_version 293552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:58,975][24592] Fps is (10 sec: 41783.9, 60 sec: 41916.0, 300 sec: 41848.6). Total num frames: 2404827136. Throughput: 0: 10465.5. Samples: 351183534. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:31:58,980][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:31:59,781][626795] Updated weights for policy 0, policy_version 293562 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:01,724][626795] Updated weights for policy 0, policy_version 293572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:03,553][626795] Updated weights for policy 0, policy_version 293582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:03,976][24592] Fps is (10 sec: 40956.6, 60 sec: 41915.1, 300 sec: 41848.5). Total num frames: 2405031936. Throughput: 0: 10444.9. Samples: 351245898. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:03,978][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:05,593][626795] Updated weights for policy 0, policy_version 293592 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:07,612][626795] Updated weights for policy 0, policy_version 293602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:08,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41915.7, 300 sec: 41848.7). Total num frames: 2405244928. Throughput: 0: 10483.1. Samples: 351309522. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:08,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:09,600][626795] Updated weights for policy 0, policy_version 293612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:11,364][626795] Updated weights for policy 0, policy_version 293622 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:13,447][626795] Updated weights for policy 0, policy_version 293632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:13,976][24592] Fps is (10 sec: 41780.8, 60 sec: 41779.0, 300 sec: 41848.6). Total num frames: 2405449728. Throughput: 0: 10501.3. Samples: 351341070. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:13,990][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:15,288][626795] Updated weights for policy 0, policy_version 293642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:17,234][626795] Updated weights for policy 0, policy_version 293652 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:18,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41779.4, 300 sec: 41848.6). Total num frames: 2405662720. Throughput: 0: 10488.0. Samples: 351403752. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:18,978][24592] Avg episode reward: [(0, '4.979')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:19,271][626795] Updated weights for policy 0, policy_version 293662 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:21,226][626795] Updated weights for policy 0, policy_version 293672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:23,085][626795] Updated weights for policy 0, policy_version 293682 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:23,975][24592] Fps is (10 sec: 42600.0, 60 sec: 41915.7, 300 sec: 41876.4). Total num frames: 2405875712. Throughput: 0: 10463.5. Samples: 351466164. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:23,977][24592] Avg episode reward: [(0, '4.331')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:25,175][626795] Updated weights for policy 0, policy_version 293692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:27,214][626795] Updated weights for policy 0, policy_version 293702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:28,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41917.0, 300 sec: 41848.7). Total num frames: 2406080512. Throughput: 0: 10443.8. Samples: 351496878. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:28,978][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:29,090][626795] Updated weights for policy 0, policy_version 293712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:31,087][626795] Updated weights for policy 0, policy_version 293722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:33,029][626795] Updated weights for policy 0, policy_version 293732 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:33,976][24592] Fps is (10 sec: 40959.4, 60 sec: 41779.0, 300 sec: 41848.6). Total num frames: 2406285312. Throughput: 0: 10424.6. Samples: 351559824. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:33,977][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:34,988][626795] Updated weights for policy 0, policy_version 293742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:37,063][626795] Updated weights for policy 0, policy_version 293752 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:38,863][626795] Updated weights for policy 0, policy_version 293762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:38,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41915.8, 300 sec: 41876.5). Total num frames: 2406498304. Throughput: 0: 10436.7. Samples: 351622224. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:38,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:40,903][626795] Updated weights for policy 0, policy_version 293772 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:42,798][626795] Updated weights for policy 0, policy_version 293782 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:43,976][24592] Fps is (10 sec: 42598.3, 60 sec: 41779.0, 300 sec: 41876.4). Total num frames: 2406711296. Throughput: 0: 10459.1. Samples: 351654198. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:43,977][24592] Avg episode reward: [(0, '4.861')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:44,810][626795] Updated weights for policy 0, policy_version 293792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:46,628][626795] Updated weights for policy 0, policy_version 293802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:48,577][626795] Updated weights for policy 0, policy_version 293812 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:48,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41780.0, 300 sec: 41876.4). Total num frames: 2406916096. Throughput: 0: 10491.1. Samples: 351717990. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:48,976][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:50,509][626795] Updated weights for policy 0, policy_version 293822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:52,363][626795] Updated weights for policy 0, policy_version 293832 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:53,976][24592] Fps is (10 sec: 41778.9, 60 sec: 41779.0, 300 sec: 41848.6). Total num frames: 2407129088. Throughput: 0: 10482.7. Samples: 351781248. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:53,980][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:54,484][626795] Updated weights for policy 0, policy_version 293842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:56,460][626795] Updated weights for policy 0, policy_version 293852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:32:58,447][626795] Updated weights for policy 0, policy_version 293862 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.2, 300 sec: 41848.6). Total num frames: 2407333888. Throughput: 0: 10447.2. Samples: 351811188. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:32:58,976][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:00,540][626795] Updated weights for policy 0, policy_version 293872 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:02,559][626795] Updated weights for policy 0, policy_version 293882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:03,975][24592] Fps is (10 sec: 40961.3, 60 sec: 41779.8, 300 sec: 41876.4). Total num frames: 2407538688. Throughput: 0: 10426.7. Samples: 351872952. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:03,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000293889_2407538688.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:04,097][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000292664_2397503488.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:04,465][626795] Updated weights for policy 0, policy_version 293892 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:06,410][626795] Updated weights for policy 0, policy_version 293902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:08,327][626795] Updated weights for policy 0, policy_version 293912 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:08,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.3, 300 sec: 41904.2). Total num frames: 2407751680. Throughput: 0: 10435.1. Samples: 351935742. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:08,977][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:10,323][626795] Updated weights for policy 0, policy_version 293922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:12,300][626795] Updated weights for policy 0, policy_version 293932 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:13,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41916.1, 300 sec: 41877.1). Total num frames: 2407964672. Throughput: 0: 10442.8. Samples: 351966804. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:13,976][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:14,317][626795] Updated weights for policy 0, policy_version 293942 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:16,120][626795] Updated weights for policy 0, policy_version 293952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:18,039][626795] Updated weights for policy 0, policy_version 293962 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:18,976][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.2, 300 sec: 41848.8). Total num frames: 2408169472. Throughput: 0: 10456.2. Samples: 352030350. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:18,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:20,010][626795] Updated weights for policy 0, policy_version 293972 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:21,985][626795] Updated weights for policy 0, policy_version 293982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:23,845][626795] Updated weights for policy 0, policy_version 293992 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:23,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41779.2, 300 sec: 41848.6). Total num frames: 2408382464. Throughput: 0: 10492.4. Samples: 352094382. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:23,978][24592] Avg episode reward: [(0, '4.903')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:25,840][626795] Updated weights for policy 0, policy_version 294002 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:27,722][626795] Updated weights for policy 0, policy_version 294012 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:28,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41779.2, 300 sec: 41820.8). Total num frames: 2408587264. Throughput: 0: 10475.2. Samples: 352125582. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:28,978][24592] Avg episode reward: [(0, '4.352')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:29,799][626795] Updated weights for policy 0, policy_version 294022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:31,785][626795] Updated weights for policy 0, policy_version 294032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:33,733][626795] Updated weights for policy 0, policy_version 294042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:33,976][24592] Fps is (10 sec: 41778.9, 60 sec: 41915.8, 300 sec: 41848.6). Total num frames: 2408800256. Throughput: 0: 10425.6. Samples: 352187142. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:33,977][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:35,656][626795] Updated weights for policy 0, policy_version 294052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:37,648][626795] Updated weights for policy 0, policy_version 294062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:38,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41915.7, 300 sec: 41876.5). Total num frames: 2409013248. Throughput: 0: 10436.7. Samples: 352250898. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:38,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:39,637][626795] Updated weights for policy 0, policy_version 294072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:41,479][626795] Updated weights for policy 0, policy_version 294082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:43,386][626795] Updated weights for policy 0, policy_version 294092 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:43,977][24592] Fps is (10 sec: 41774.8, 60 sec: 41778.6, 300 sec: 41848.5). Total num frames: 2409218048. Throughput: 0: 10468.7. Samples: 352282290. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:43,981][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:45,377][626795] Updated weights for policy 0, policy_version 294102 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:47,301][626795] Updated weights for policy 0, policy_version 294112 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:48,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.7, 300 sec: 41848.6). Total num frames: 2409431040. Throughput: 0: 10502.3. Samples: 352345554. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:48,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:49,253][626795] Updated weights for policy 0, policy_version 294122 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:51,129][626795] Updated weights for policy 0, policy_version 294132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:53,082][626795] Updated weights for policy 0, policy_version 294142 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:53,976][24592] Fps is (10 sec: 42602.9, 60 sec: 41915.9, 300 sec: 41848.6). Total num frames: 2409644032. Throughput: 0: 10530.8. Samples: 352409628. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:53,977][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:55,107][626795] Updated weights for policy 0, policy_version 294152 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:56,972][626795] Updated weights for policy 0, policy_version 294162 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:33:58,968][626795] Updated weights for policy 0, policy_version 294172 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:58,976][24592] Fps is (10 sec: 42597.4, 60 sec: 42052.1, 300 sec: 41876.5). Total num frames: 2409857024. Throughput: 0: 10529.1. Samples: 352440618. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:33:58,979][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:00,960][626795] Updated weights for policy 0, policy_version 294182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:02,948][626795] Updated weights for policy 0, policy_version 294192 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 42052.2, 300 sec: 41848.6). Total num frames: 2410061824. Throughput: 0: 10507.0. Samples: 352503168. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:03,977][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:04,959][626795] Updated weights for policy 0, policy_version 294202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:06,830][626795] Updated weights for policy 0, policy_version 294212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:08,814][626795] Updated weights for policy 0, policy_version 294222 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:08,975][24592] Fps is (10 sec: 41780.4, 60 sec: 42052.3, 300 sec: 41904.2). Total num frames: 2410274816. Throughput: 0: 10482.2. Samples: 352566078. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:08,977][24592] Avg episode reward: [(0, '4.348')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:10,717][626795] Updated weights for policy 0, policy_version 294232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:12,735][626795] Updated weights for policy 0, policy_version 294242 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41915.7, 300 sec: 41848.6). Total num frames: 2410479616. Throughput: 0: 10483.6. Samples: 352597344. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:13,977][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:14,646][626795] Updated weights for policy 0, policy_version 294252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:16,584][626795] Updated weights for policy 0, policy_version 294262 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:18,451][626795] Updated weights for policy 0, policy_version 294272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:18,977][24592] Fps is (10 sec: 41771.5, 60 sec: 42051.0, 300 sec: 41876.2). Total num frames: 2410692608. Throughput: 0: 10528.7. Samples: 352660950. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:18,978][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:20,401][626795] Updated weights for policy 0, policy_version 294282 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:22,337][626795] Updated weights for policy 0, policy_version 294292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:23,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42052.3, 300 sec: 41876.4). Total num frames: 2410905600. Throughput: 0: 10538.3. Samples: 352725120. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:23,976][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:24,365][626795] Updated weights for policy 0, policy_version 294302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:26,215][626795] Updated weights for policy 0, policy_version 294312 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:28,015][626795] Updated weights for policy 0, policy_version 294322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:28,976][24592] Fps is (10 sec: 42605.4, 60 sec: 42188.8, 300 sec: 41876.4). Total num frames: 2411118592. Throughput: 0: 10538.2. Samples: 352756500. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:28,977][24592] Avg episode reward: [(0, '4.833')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:30,025][626795] Updated weights for policy 0, policy_version 294332 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:32,103][626795] Updated weights for policy 0, policy_version 294342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:33,976][24592] Fps is (10 sec: 41778.4, 60 sec: 42052.3, 300 sec: 41848.7). Total num frames: 2411323392. Throughput: 0: 10515.2. Samples: 352818738. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:33,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:34,025][626795] Updated weights for policy 0, policy_version 294352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:36,092][626795] Updated weights for policy 0, policy_version 294362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:38,006][626795] Updated weights for policy 0, policy_version 294372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:38,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41915.7, 300 sec: 41848.7). Total num frames: 2411528192. Throughput: 0: 10483.9. Samples: 352881402. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:38,976][24592] Avg episode reward: [(0, '4.410')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:39,973][626795] Updated weights for policy 0, policy_version 294382 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:42,003][626795] Updated weights for policy 0, policy_version 294392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:43,884][626795] Updated weights for policy 0, policy_version 294402 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:43,977][24592] Fps is (10 sec: 41773.6, 60 sec: 42052.1, 300 sec: 41876.2). Total num frames: 2411741184. Throughput: 0: 10480.6. Samples: 352912260. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:43,978][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:45,857][626795] Updated weights for policy 0, policy_version 294412 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:47,765][626795] Updated weights for policy 0, policy_version 294422 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:48,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41915.8, 300 sec: 41848.6). Total num frames: 2411945984. Throughput: 0: 10497.2. Samples: 352975542. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:48,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:49,806][626795] Updated weights for policy 0, policy_version 294432 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:51,655][626795] Updated weights for policy 0, policy_version 294442 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:53,491][626795] Updated weights for policy 0, policy_version 294452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:53,975][24592] Fps is (10 sec: 41785.5, 60 sec: 41915.8, 300 sec: 41876.4). Total num frames: 2412158976. Throughput: 0: 10521.5. Samples: 353039544. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:53,976][24592] Avg episode reward: [(0, '4.857')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:55,599][626795] Updated weights for policy 0, policy_version 294462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:57,482][626795] Updated weights for policy 0, policy_version 294472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:58,976][24592] Fps is (10 sec: 42596.2, 60 sec: 41915.6, 300 sec: 41876.3). Total num frames: 2412371968. Throughput: 0: 10518.7. Samples: 353070690. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:34:58,978][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:34:59,353][626795] Updated weights for policy 0, policy_version 294482 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:01,318][626795] Updated weights for policy 0, policy_version 294492 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:03,280][626795] Updated weights for policy 0, policy_version 294502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:03,980][24592] Fps is (10 sec: 42576.5, 60 sec: 42048.7, 300 sec: 41875.7). Total num frames: 2412584960. Throughput: 0: 10513.1. Samples: 353134074. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:03,981][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:03,987][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000294505_2412584960.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:04,088][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000293277_2402525184.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:05,343][626795] Updated weights for policy 0, policy_version 294512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:07,305][626795] Updated weights for policy 0, policy_version 294522 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:08,975][24592] Fps is (10 sec: 41780.8, 60 sec: 41915.6, 300 sec: 41876.4). Total num frames: 2412789760. Throughput: 0: 10457.4. Samples: 353195706. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:08,977][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:09,317][626795] Updated weights for policy 0, policy_version 294532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:11,260][626795] Updated weights for policy 0, policy_version 294542 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:13,184][626795] Updated weights for policy 0, policy_version 294552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:13,975][24592] Fps is (10 sec: 41800.7, 60 sec: 42052.3, 300 sec: 41876.4). Total num frames: 2413002752. Throughput: 0: 10455.6. Samples: 353227002. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:13,976][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:15,212][626795] Updated weights for policy 0, policy_version 294562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:17,086][626795] Updated weights for policy 0, policy_version 294572 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:18,976][24592] Fps is (10 sec: 41777.7, 60 sec: 41916.7, 300 sec: 41904.1). Total num frames: 2413207552. Throughput: 0: 10474.5. Samples: 353290092. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:18,980][24592] Avg episode reward: [(0, '4.927')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:18,993][626795] Updated weights for policy 0, policy_version 294582 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:20,969][626795] Updated weights for policy 0, policy_version 294592 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:22,880][626795] Updated weights for policy 0, policy_version 294602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:23,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41915.6, 300 sec: 41931.9). Total num frames: 2413420544. Throughput: 0: 10494.2. Samples: 353353644. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:23,979][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:24,873][626795] Updated weights for policy 0, policy_version 294612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:26,810][626795] Updated weights for policy 0, policy_version 294622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:28,685][626795] Updated weights for policy 0, policy_version 294632 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:28,975][24592] Fps is (10 sec: 42600.6, 60 sec: 41915.9, 300 sec: 41904.2). Total num frames: 2413633536. Throughput: 0: 10514.9. Samples: 353385414. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:28,977][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:30,670][626795] Updated weights for policy 0, policy_version 294642 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:32,538][626795] Updated weights for policy 0, policy_version 294652 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:33,975][24592] Fps is (10 sec: 42599.3, 60 sec: 42052.4, 300 sec: 41904.2). Total num frames: 2413846528. Throughput: 0: 10517.3. Samples: 353448822. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:33,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:34,538][626795] Updated weights for policy 0, policy_version 294662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:36,528][626795] Updated weights for policy 0, policy_version 294672 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:38,441][626795] Updated weights for policy 0, policy_version 294682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:38,976][24592] Fps is (10 sec: 41778.2, 60 sec: 42052.2, 300 sec: 41904.1). Total num frames: 2414051328. Throughput: 0: 10484.0. Samples: 353511324. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:38,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:40,539][626795] Updated weights for policy 0, policy_version 294692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:42,465][626795] Updated weights for policy 0, policy_version 294702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:43,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42053.3, 300 sec: 41904.2). Total num frames: 2414264320. Throughput: 0: 10480.3. Samples: 353542296. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:43,976][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:44,551][626795] Updated weights for policy 0, policy_version 294712 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:46,325][626795] Updated weights for policy 0, policy_version 294722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:48,320][626795] Updated weights for policy 0, policy_version 294732 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:48,976][24592] Fps is (10 sec: 41778.4, 60 sec: 42052.0, 300 sec: 41932.0). Total num frames: 2414469120. Throughput: 0: 10462.4. Samples: 353604834. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:48,978][24592] Avg episode reward: [(0, '4.864')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:50,436][626795] Updated weights for policy 0, policy_version 294742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:52,229][626795] Updated weights for policy 0, policy_version 294752 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:53,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2414673920. Throughput: 0: 10478.0. Samples: 353667216. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:53,977][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:54,230][626795] Updated weights for policy 0, policy_version 294762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:56,353][626795] Updated weights for policy 0, policy_version 294772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:35:58,118][626795] Updated weights for policy 0, policy_version 294782 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:58,975][24592] Fps is (10 sec: 41781.0, 60 sec: 41916.1, 300 sec: 41931.9). Total num frames: 2414886912. Throughput: 0: 10480.1. Samples: 353698608. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:35:58,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:00,106][626795] Updated weights for policy 0, policy_version 294792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:02,013][626795] Updated weights for policy 0, policy_version 294802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41782.8, 300 sec: 41904.2). Total num frames: 2415091712. Throughput: 0: 10481.7. Samples: 353761764. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:03,977][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:04,013][626795] Updated weights for policy 0, policy_version 294812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:05,843][626795] Updated weights for policy 0, policy_version 294822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:07,916][626795] Updated weights for policy 0, policy_version 294832 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:08,975][24592] Fps is (10 sec: 41778.5, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2415304704. Throughput: 0: 10471.6. Samples: 353824866. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:08,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:09,929][626795] Updated weights for policy 0, policy_version 294842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:11,750][626795] Updated weights for policy 0, policy_version 294852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:13,764][626795] Updated weights for policy 0, policy_version 294862 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:13,975][24592] Fps is (10 sec: 42597.9, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2415517696. Throughput: 0: 10453.0. Samples: 353855802. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:13,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:15,845][626795] Updated weights for policy 0, policy_version 294872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:17,705][626795] Updated weights for policy 0, policy_version 294882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:18,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41916.0, 300 sec: 41904.2). Total num frames: 2415722496. Throughput: 0: 10442.0. Samples: 353918712. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:18,978][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:19,663][626795] Updated weights for policy 0, policy_version 294892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:21,668][626795] Updated weights for policy 0, policy_version 294902 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:23,599][626795] Updated weights for policy 0, policy_version 294912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:23,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41915.9, 300 sec: 41932.2). Total num frames: 2415935488. Throughput: 0: 10454.6. Samples: 353981778. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:23,976][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:25,556][626795] Updated weights for policy 0, policy_version 294922 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:27,524][626795] Updated weights for policy 0, policy_version 294932 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:28,976][24592] Fps is (10 sec: 42595.5, 60 sec: 41915.1, 300 sec: 41931.8). Total num frames: 2416148480. Throughput: 0: 10461.1. Samples: 354013056. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:28,977][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:29,447][626795] Updated weights for policy 0, policy_version 294942 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:31,335][626795] Updated weights for policy 0, policy_version 294952 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:33,253][626795] Updated weights for policy 0, policy_version 294962 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:33,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.2, 300 sec: 41931.9). Total num frames: 2416353280. Throughput: 0: 10475.0. Samples: 354076206. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:33,976][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:35,274][626795] Updated weights for policy 0, policy_version 294972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:37,109][626795] Updated weights for policy 0, policy_version 294982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:38,976][24592] Fps is (10 sec: 41781.4, 60 sec: 41915.7, 300 sec: 41904.1). Total num frames: 2416566272. Throughput: 0: 10504.3. Samples: 354139914. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:38,977][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:39,011][626795] Updated weights for policy 0, policy_version 294992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:41,056][626795] Updated weights for policy 0, policy_version 295002 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:43,125][626795] Updated weights for policy 0, policy_version 295012 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:43,976][24592] Fps is (10 sec: 41777.9, 60 sec: 41779.0, 300 sec: 41904.3). Total num frames: 2416771072. Throughput: 0: 10494.7. Samples: 354170874. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:43,978][24592] Avg episode reward: [(0, '5.026')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:45,109][626795] Updated weights for policy 0, policy_version 295022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:47,106][626795] Updated weights for policy 0, policy_version 295032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:48,887][626795] Updated weights for policy 0, policy_version 295042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:48,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41915.9, 300 sec: 41904.1). Total num frames: 2416984064. Throughput: 0: 10463.8. Samples: 354232638. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:48,976][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:50,863][626795] Updated weights for policy 0, policy_version 295052 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:52,889][626795] Updated weights for policy 0, policy_version 295062 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:53,980][24592] Fps is (10 sec: 41760.0, 60 sec: 41912.3, 300 sec: 41903.5). Total num frames: 2417188864. Throughput: 0: 10461.5. Samples: 354295686. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:53,981][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:54,930][626795] Updated weights for policy 0, policy_version 295072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:56,915][626795] Updated weights for policy 0, policy_version 295082 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:36:58,701][626795] Updated weights for policy 0, policy_version 295092 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:58,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41915.7, 300 sec: 41932.1). Total num frames: 2417401856. Throughput: 0: 10466.0. Samples: 354326772. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:36:58,977][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:00,696][626795] Updated weights for policy 0, policy_version 295102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:02,658][626795] Updated weights for policy 0, policy_version 295112 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:03,975][24592] Fps is (10 sec: 41799.8, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2417606656. Throughput: 0: 10481.5. Samples: 354390378. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:03,978][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:04,002][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000295119_2417614848.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:04,076][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000293889_2407538688.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:04,693][626795] Updated weights for policy 0, policy_version 295122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:06,572][626795] Updated weights for policy 0, policy_version 295132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:08,559][626795] Updated weights for policy 0, policy_version 295142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:08,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41915.8, 300 sec: 41932.0). Total num frames: 2417819648. Throughput: 0: 10471.2. Samples: 354452982. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:08,976][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:10,509][626795] Updated weights for policy 0, policy_version 295152 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:12,398][626795] Updated weights for policy 0, policy_version 295162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:13,976][24592] Fps is (10 sec: 41778.6, 60 sec: 41779.2, 300 sec: 41904.1). Total num frames: 2418024448. Throughput: 0: 10469.5. Samples: 354484176. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:13,979][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:14,489][626795] Updated weights for policy 0, policy_version 295172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:16,526][626795] Updated weights for policy 0, policy_version 295182 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:18,365][626795] Updated weights for policy 0, policy_version 295192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:18,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41779.2, 300 sec: 41876.4). Total num frames: 2418229248. Throughput: 0: 10441.6. Samples: 354546078. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:18,977][24592] Avg episode reward: [(0, '4.890')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:19,746][626772] Signal inference workers to stop experience collection... (4600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:19,748][626772] Signal inference workers to resume experience collection... (4600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:19,766][626795] InferenceWorker_p0-w0: stopping experience collection (4600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:19,771][626795] InferenceWorker_p0-w0: resuming experience collection (4600 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:20,427][626795] Updated weights for policy 0, policy_version 295202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:22,398][626795] Updated weights for policy 0, policy_version 295212 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:23,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41779.2, 300 sec: 41904.2). Total num frames: 2418442240. Throughput: 0: 10419.1. Samples: 354608772. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:23,978][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:24,346][626795] Updated weights for policy 0, policy_version 295222 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:26,243][626795] Updated weights for policy 0, policy_version 295232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:28,227][626795] Updated weights for policy 0, policy_version 295242 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:28,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41643.2, 300 sec: 41904.2). Total num frames: 2418647040. Throughput: 0: 10415.5. Samples: 354639570. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:28,976][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:30,254][626795] Updated weights for policy 0, policy_version 295252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:32,154][626795] Updated weights for policy 0, policy_version 295262 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:33,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41642.7, 300 sec: 41876.4). Total num frames: 2418851840. Throughput: 0: 10419.9. Samples: 354701532. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:33,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:34,241][626795] Updated weights for policy 0, policy_version 295272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:36,163][626795] Updated weights for policy 0, policy_version 295282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:38,047][626795] Updated weights for policy 0, policy_version 295292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:38,977][24592] Fps is (10 sec: 41778.6, 60 sec: 41642.8, 300 sec: 41876.4). Total num frames: 2419064832. Throughput: 0: 10439.0. Samples: 354765390. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:38,979][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:39,997][626795] Updated weights for policy 0, policy_version 295302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:41,949][626795] Updated weights for policy 0, policy_version 295312 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:43,882][626795] Updated weights for policy 0, policy_version 295322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:43,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41779.4, 300 sec: 41904.2). Total num frames: 2419277824. Throughput: 0: 10448.1. Samples: 354796938. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:43,976][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:45,885][626795] Updated weights for policy 0, policy_version 295332 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:47,876][626795] Updated weights for policy 0, policy_version 295342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:48,976][24592] Fps is (10 sec: 42598.3, 60 sec: 41779.2, 300 sec: 41904.2). Total num frames: 2419490816. Throughput: 0: 10412.2. Samples: 354858930. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:48,978][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:49,916][626795] Updated weights for policy 0, policy_version 295352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:51,859][626795] Updated weights for policy 0, policy_version 295362 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:53,760][626795] Updated weights for policy 0, policy_version 295372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:53,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41782.6, 300 sec: 41904.1). Total num frames: 2419695616. Throughput: 0: 10422.1. Samples: 354921978. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:53,978][24592] Avg episode reward: [(0, '4.463')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:55,714][626795] Updated weights for policy 0, policy_version 295382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:57,704][626795] Updated weights for policy 0, policy_version 295392 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:58,976][24592] Fps is (10 sec: 40959.7, 60 sec: 41642.5, 300 sec: 41904.1). Total num frames: 2419900416. Throughput: 0: 10409.8. Samples: 354952620. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:37:58,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:37:59,660][626795] Updated weights for policy 0, policy_version 295402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:01,573][626795] Updated weights for policy 0, policy_version 295412 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:03,527][626795] Updated weights for policy 0, policy_version 295422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41779.2, 300 sec: 41904.2). Total num frames: 2420113408. Throughput: 0: 10443.6. Samples: 355016040. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:03,976][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:05,414][626795] Updated weights for policy 0, policy_version 295432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:07,380][626795] Updated weights for policy 0, policy_version 295442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:08,976][24592] Fps is (10 sec: 42598.6, 60 sec: 41779.1, 300 sec: 41904.1). Total num frames: 2420326400. Throughput: 0: 10457.2. Samples: 355079346. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:08,978][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:09,298][626795] Updated weights for policy 0, policy_version 295452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:11,351][626795] Updated weights for policy 0, policy_version 295462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:13,128][626795] Updated weights for policy 0, policy_version 295472 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:13,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41915.9, 300 sec: 41931.9). Total num frames: 2420539392. Throughput: 0: 10472.1. Samples: 355110816. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:13,976][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:15,205][626795] Updated weights for policy 0, policy_version 295482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:17,176][626795] Updated weights for policy 0, policy_version 295492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:18,976][24592] Fps is (10 sec: 41779.3, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2420744192. Throughput: 0: 10474.9. Samples: 355172904. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:18,976][24592] Avg episode reward: [(0, '4.871')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:19,189][626795] Updated weights for policy 0, policy_version 295502 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:21,152][626795] Updated weights for policy 0, policy_version 295512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:23,119][626795] Updated weights for policy 0, policy_version 295522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:23,975][24592] Fps is (10 sec: 40959.5, 60 sec: 41779.1, 300 sec: 41904.2). Total num frames: 2420948992. Throughput: 0: 10434.5. Samples: 355234944. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:23,976][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:25,233][626795] Updated weights for policy 0, policy_version 295532 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:27,164][626795] Updated weights for policy 0, policy_version 295542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:28,976][24592] Fps is (10 sec: 40959.3, 60 sec: 41779.0, 300 sec: 41876.4). Total num frames: 2421153792. Throughput: 0: 10413.9. Samples: 355265568. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:28,977][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:28,988][626795] Updated weights for policy 0, policy_version 295552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:31,060][626795] Updated weights for policy 0, policy_version 295562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:32,939][626795] Updated weights for policy 0, policy_version 295572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:33,982][24592] Fps is (10 sec: 41751.4, 60 sec: 41911.0, 300 sec: 41875.4). Total num frames: 2421366784. Throughput: 0: 10439.7. Samples: 355328784. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:33,984][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:35,004][626795] Updated weights for policy 0, policy_version 295582 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:36,947][626795] Updated weights for policy 0, policy_version 295592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:38,880][626795] Updated weights for policy 0, policy_version 295602 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:38,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41779.3, 300 sec: 41876.6). Total num frames: 2421571584. Throughput: 0: 10435.8. Samples: 355391586. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:38,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:40,740][626795] Updated weights for policy 0, policy_version 295612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:42,727][626795] Updated weights for policy 0, policy_version 295622 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:43,975][24592] Fps is (10 sec: 41807.4, 60 sec: 41779.2, 300 sec: 41876.4). Total num frames: 2421784576. Throughput: 0: 10470.2. Samples: 355423776. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:43,977][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:44,590][626795] Updated weights for policy 0, policy_version 295632 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:46,538][626795] Updated weights for policy 0, policy_version 295642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:48,504][626795] Updated weights for policy 0, policy_version 295652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:48,976][24592] Fps is (10 sec: 42597.4, 60 sec: 41779.2, 300 sec: 41876.4). Total num frames: 2421997568. Throughput: 0: 10466.1. Samples: 355487016. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:48,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:50,563][626795] Updated weights for policy 0, policy_version 295662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:52,598][626795] Updated weights for policy 0, policy_version 295672 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:53,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41779.2, 300 sec: 41848.6). Total num frames: 2422202368. Throughput: 0: 10423.1. Samples: 355548384. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:53,977][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:54,535][626795] Updated weights for policy 0, policy_version 295682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:56,561][626795] Updated weights for policy 0, policy_version 295692 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:38:58,503][626795] Updated weights for policy 0, policy_version 295702 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:58,983][24592] Fps is (10 sec: 40929.5, 60 sec: 41774.0, 300 sec: 41847.6). Total num frames: 2422407168. Throughput: 0: 10394.8. Samples: 355578660. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:38:58,984][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:00,490][626795] Updated weights for policy 0, policy_version 295712 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:02,438][626795] Updated weights for policy 0, policy_version 295722 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:03,975][24592] Fps is (10 sec: 40960.6, 60 sec: 41642.7, 300 sec: 41820.9). Total num frames: 2422611968. Throughput: 0: 10404.3. Samples: 355641096. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:03,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:04,017][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000295730_2422620160.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:04,111][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000294505_2412584960.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:04,540][626795] Updated weights for policy 0, policy_version 295732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:06,444][626795] Updated weights for policy 0, policy_version 295742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:08,326][626795] Updated weights for policy 0, policy_version 295752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:08,975][24592] Fps is (10 sec: 40991.4, 60 sec: 41506.3, 300 sec: 41820.9). Total num frames: 2422816768. Throughput: 0: 10410.4. Samples: 355703412. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:08,977][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:10,453][626795] Updated weights for policy 0, policy_version 295762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:12,327][626795] Updated weights for policy 0, policy_version 295772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:13,976][24592] Fps is (10 sec: 42594.7, 60 sec: 41642.0, 300 sec: 41848.8). Total num frames: 2423037952. Throughput: 0: 10433.5. Samples: 355735080. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:13,978][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:14,361][626795] Updated weights for policy 0, policy_version 295782 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:16,144][626795] Updated weights for policy 0, policy_version 295792 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:18,176][626795] Updated weights for policy 0, policy_version 295802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:18,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41642.8, 300 sec: 41820.8). Total num frames: 2423242752. Throughput: 0: 10427.7. Samples: 355797960. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:18,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:20,099][626795] Updated weights for policy 0, policy_version 295812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:22,090][626795] Updated weights for policy 0, policy_version 295822 (0.0033)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:23,975][24592] Fps is (10 sec: 40963.1, 60 sec: 41642.7, 300 sec: 41793.1). Total num frames: 2423447552. Throughput: 0: 10419.4. Samples: 355860462. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:23,977][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:24,133][626795] Updated weights for policy 0, policy_version 295832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:26,127][626795] Updated weights for policy 0, policy_version 295842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:28,040][626795] Updated weights for policy 0, policy_version 295852 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:28,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41642.9, 300 sec: 41793.1). Total num frames: 2423652352. Throughput: 0: 10370.5. Samples: 355890450. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:28,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:30,110][626795] Updated weights for policy 0, policy_version 295862 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:32,029][626795] Updated weights for policy 0, policy_version 295872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:33,976][24592] Fps is (10 sec: 40957.1, 60 sec: 41510.2, 300 sec: 41793.0). Total num frames: 2423857152. Throughput: 0: 10343.9. Samples: 355952496. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:33,978][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:33,999][626795] Updated weights for policy 0, policy_version 295882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:35,960][626795] Updated weights for policy 0, policy_version 295892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:37,958][626795] Updated weights for policy 0, policy_version 295902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:38,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41642.7, 300 sec: 41793.3). Total num frames: 2424070144. Throughput: 0: 10389.5. Samples: 356015910. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:38,976][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:39,968][626795] Updated weights for policy 0, policy_version 295912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:41,765][626795] Updated weights for policy 0, policy_version 295922 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:43,711][626795] Updated weights for policy 0, policy_version 295932 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:43,975][24592] Fps is (10 sec: 42601.8, 60 sec: 41642.6, 300 sec: 41820.8). Total num frames: 2424283136. Throughput: 0: 10415.8. Samples: 356047290. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:43,976][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:45,825][626795] Updated weights for policy 0, policy_version 295942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:47,657][626795] Updated weights for policy 0, policy_version 295952 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:48,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.3, 300 sec: 41793.1). Total num frames: 2424487936. Throughput: 0: 10423.2. Samples: 356110140. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:48,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:49,606][626795] Updated weights for policy 0, policy_version 295962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:51,578][626795] Updated weights for policy 0, policy_version 295972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:53,554][626795] Updated weights for policy 0, policy_version 295982 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:53,977][24592] Fps is (10 sec: 41772.6, 60 sec: 41641.6, 300 sec: 41792.9). Total num frames: 2424700928. Throughput: 0: 10450.3. Samples: 356173692. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:53,979][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:55,555][626795] Updated weights for policy 0, policy_version 295992 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:57,608][626795] Updated weights for policy 0, policy_version 296002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:58,976][24592] Fps is (10 sec: 40959.5, 60 sec: 41511.4, 300 sec: 41738.3). Total num frames: 2424897536. Throughput: 0: 10395.8. Samples: 356202882. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:39:58,976][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:39:59,637][626795] Updated weights for policy 0, policy_version 296012 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:01,569][626795] Updated weights for policy 0, policy_version 296022 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:03,486][626795] Updated weights for policy 0, policy_version 296032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:03,976][24592] Fps is (10 sec: 40145.9, 60 sec: 41505.9, 300 sec: 41737.5). Total num frames: 2425102336. Throughput: 0: 10382.7. Samples: 356265186. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:03,977][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:05,928][626795] Updated weights for policy 0, policy_version 296042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:07,935][626795] Updated weights for policy 0, policy_version 296052 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:08,975][24592] Fps is (10 sec: 40141.1, 60 sec: 41369.6, 300 sec: 41682.0). Total num frames: 2425298944. Throughput: 0: 10268.7. Samples: 356322552. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:08,977][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:10,102][626795] Updated weights for policy 0, policy_version 296062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:11,897][626795] Updated weights for policy 0, policy_version 296072 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:13,855][626795] Updated weights for policy 0, policy_version 296082 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:13,975][24592] Fps is (10 sec: 40142.2, 60 sec: 41097.1, 300 sec: 41682.1). Total num frames: 2425503744. Throughput: 0: 10287.9. Samples: 356353404. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:13,977][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:15,923][626795] Updated weights for policy 0, policy_version 296092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:17,816][626795] Updated weights for policy 0, policy_version 296102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:18,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41233.1, 300 sec: 41682.0). Total num frames: 2425716736. Throughput: 0: 10302.2. Samples: 356416086. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:18,976][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:19,788][626795] Updated weights for policy 0, policy_version 296112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:21,665][626795] Updated weights for policy 0, policy_version 296122 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:23,633][626795] Updated weights for policy 0, policy_version 296132 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:23,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41233.2, 300 sec: 41654.2). Total num frames: 2425921536. Throughput: 0: 10299.7. Samples: 356479398. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:23,977][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:25,977][626795] Updated weights for policy 0, policy_version 296142 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:28,203][626795] Updated weights for policy 0, policy_version 296152 (0.0031)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:28,976][24592] Fps is (10 sec: 38501.0, 60 sec: 40823.2, 300 sec: 41543.1). Total num frames: 2426101760. Throughput: 0: 10177.9. Samples: 356505300. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:28,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:30,232][626795] Updated weights for policy 0, policy_version 296162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:32,377][626795] Updated weights for policy 0, policy_version 296172 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:33,975][24592] Fps is (10 sec: 38502.0, 60 sec: 40824.0, 300 sec: 41543.2). Total num frames: 2426306560. Throughput: 0: 10104.5. Samples: 356564844. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:33,976][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:34,273][626795] Updated weights for policy 0, policy_version 296182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:36,249][626795] Updated weights for policy 0, policy_version 296192 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:38,383][626795] Updated weights for policy 0, policy_version 296202 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:38,976][24592] Fps is (10 sec: 40141.1, 60 sec: 40550.2, 300 sec: 41487.6). Total num frames: 2426503168. Throughput: 0: 10048.3. Samples: 356625852. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:38,976][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:40,415][626795] Updated weights for policy 0, policy_version 296212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:42,277][626795] Updated weights for policy 0, policy_version 296222 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:43,976][24592] Fps is (10 sec: 40957.6, 60 sec: 40550.0, 300 sec: 41515.4). Total num frames: 2426716160. Throughput: 0: 10075.2. Samples: 356656272. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:43,979][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:44,418][626795] Updated weights for policy 0, policy_version 296232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:46,366][626795] Updated weights for policy 0, policy_version 296242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:48,156][626795] Updated weights for policy 0, policy_version 296252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:48,975][24592] Fps is (10 sec: 41780.2, 60 sec: 40550.4, 300 sec: 41515.4). Total num frames: 2426920960. Throughput: 0: 10083.7. Samples: 356718948. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:48,979][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:50,321][626795] Updated weights for policy 0, policy_version 296262 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:52,316][626795] Updated weights for policy 0, policy_version 296272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:53,976][24592] Fps is (10 sec: 40142.3, 60 sec: 40278.2, 300 sec: 41459.8). Total num frames: 2427117568. Throughput: 0: 10131.8. Samples: 356778486. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:53,977][24592] Avg episode reward: [(0, '4.370')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:54,495][626795] Updated weights for policy 0, policy_version 296282 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:56,969][626795] Updated weights for policy 0, policy_version 296292 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:58,975][24592] Fps is (10 sec: 36864.2, 60 sec: 39867.8, 300 sec: 41348.8). Total num frames: 2427289600. Throughput: 0: 10050.8. Samples: 356805690. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:40:58,977][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:40:59,498][626795] Updated weights for policy 0, policy_version 296302 (0.0042)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:01,514][626795] Updated weights for policy 0, policy_version 296312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:03,685][626795] Updated weights for policy 0, policy_version 296322 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:03,975][24592] Fps is (10 sec: 36045.5, 60 sec: 39594.8, 300 sec: 41265.5). Total num frames: 2427478016. Throughput: 0: 9802.4. Samples: 356857194. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:03,976][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000296323_2427478016.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:04,125][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000295119_2417614848.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:06,060][626795] Updated weights for policy 0, policy_version 296332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:08,162][626795] Updated weights for policy 0, policy_version 296342 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:08,975][24592] Fps is (10 sec: 36863.9, 60 sec: 39321.6, 300 sec: 41154.4). Total num frames: 2427658240. Throughput: 0: 9668.5. Samples: 356914482. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:08,977][24592] Avg episode reward: [(0, '4.926')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:10,384][626795] Updated weights for policy 0, policy_version 296352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:12,940][626795] Updated weights for policy 0, policy_version 296362 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:13,977][24592] Fps is (10 sec: 35219.6, 60 sec: 38774.3, 300 sec: 41043.1). Total num frames: 2427830272. Throughput: 0: 9672.9. Samples: 356940594. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:13,980][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:15,384][626795] Updated weights for policy 0, policy_version 296372 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:17,329][626795] Updated weights for policy 0, policy_version 296382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:18,976][24592] Fps is (10 sec: 36044.1, 60 sec: 38365.7, 300 sec: 40960.0). Total num frames: 2428018688. Throughput: 0: 9539.7. Samples: 356994132. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:18,978][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:19,580][626795] Updated weights for policy 0, policy_version 296392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:21,967][626795] Updated weights for policy 0, policy_version 296402 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:23,975][24592] Fps is (10 sec: 36051.4, 60 sec: 37819.7, 300 sec: 40821.3). Total num frames: 2428190720. Throughput: 0: 9331.7. Samples: 357045774. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:23,977][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:24,416][626795] Updated weights for policy 0, policy_version 296412 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:26,574][626795] Updated weights for policy 0, policy_version 296422 (0.0031)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:28,678][626795] Updated weights for policy 0, policy_version 296432 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:28,976][24592] Fps is (10 sec: 36042.3, 60 sec: 37955.9, 300 sec: 40765.5). Total num frames: 2428379136. Throughput: 0: 9277.4. Samples: 357073758. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:28,977][24592] Avg episode reward: [(0, '4.928')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:30,878][626795] Updated weights for policy 0, policy_version 296442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:33,104][626795] Updated weights for policy 0, policy_version 296452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:33,976][24592] Fps is (10 sec: 37682.4, 60 sec: 37683.1, 300 sec: 40682.3). Total num frames: 2428567552. Throughput: 0: 9137.0. Samples: 357130116. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:33,980][24592] Avg episode reward: [(0, '4.317')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:35,224][626795] Updated weights for policy 0, policy_version 296462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:37,467][626795] Updated weights for policy 0, policy_version 296472 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:38,975][24592] Fps is (10 sec: 36867.2, 60 sec: 37410.3, 300 sec: 40599.0). Total num frames: 2428747776. Throughput: 0: 9083.9. Samples: 357187260. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:38,977][24592] Avg episode reward: [(0, '4.950')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:39,729][626795] Updated weights for policy 0, policy_version 296482 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:41,737][626795] Updated weights for policy 0, policy_version 296492 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:43,978][24592] Fps is (10 sec: 37675.7, 60 sec: 37136.1, 300 sec: 40543.2). Total num frames: 2428944384. Throughput: 0: 9097.2. Samples: 357215082. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:43,979][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:43,981][626795] Updated weights for policy 0, policy_version 296502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:46,067][626795] Updated weights for policy 0, policy_version 296512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:48,190][626795] Updated weights for policy 0, policy_version 296522 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:48,975][24592] Fps is (10 sec: 38502.7, 60 sec: 36864.1, 300 sec: 40488.6). Total num frames: 2429132800. Throughput: 0: 9212.0. Samples: 357271734. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:48,976][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:50,924][626795] Updated weights for policy 0, policy_version 296532 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:53,018][626795] Updated weights for policy 0, policy_version 296542 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:53,975][24592] Fps is (10 sec: 36052.2, 60 sec: 36454.5, 300 sec: 40349.1). Total num frames: 2429304832. Throughput: 0: 9109.8. Samples: 357324426. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:53,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:55,417][626795] Updated weights for policy 0, policy_version 296552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:57,612][626795] Updated weights for policy 0, policy_version 296562 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:58,975][24592] Fps is (10 sec: 35225.3, 60 sec: 36590.9, 300 sec: 40265.8). Total num frames: 2429485056. Throughput: 0: 9093.7. Samples: 357349794. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:41:58,977][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:41:59,802][626795] Updated weights for policy 0, policy_version 296572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:01,890][626795] Updated weights for policy 0, policy_version 296582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:03,927][626795] Updated weights for policy 0, policy_version 296592 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:03,976][24592] Fps is (10 sec: 37683.0, 60 sec: 36727.4, 300 sec: 40210.2). Total num frames: 2429681664. Throughput: 0: 9180.8. Samples: 357407268. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:03,976][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:06,079][626795] Updated weights for policy 0, policy_version 296602 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:08,975][24592] Fps is (10 sec: 35225.5, 60 sec: 36317.8, 300 sec: 40043.6). Total num frames: 2429837312. Throughput: 0: 9196.8. Samples: 357459630. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:08,978][24592] Avg episode reward: [(0, '4.935')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:09,004][626795] Updated weights for policy 0, policy_version 296612 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:11,784][626795] Updated weights for policy 0, policy_version 296622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:13,976][24592] Fps is (10 sec: 31948.6, 60 sec: 36182.3, 300 sec: 39904.7). Total num frames: 2430001152. Throughput: 0: 9078.1. Samples: 357482268. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:13,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:14,243][626795] Updated weights for policy 0, policy_version 296632 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:16,290][626795] Updated weights for policy 0, policy_version 296642 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:18,433][626795] Updated weights for policy 0, policy_version 296652 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:18,975][24592] Fps is (10 sec: 35225.7, 60 sec: 36181.5, 300 sec: 39821.5). Total num frames: 2430189568. Throughput: 0: 9042.0. Samples: 357537006. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:18,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:20,388][626795] Updated weights for policy 0, policy_version 296662 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:22,379][626795] Updated weights for policy 0, policy_version 296672 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:23,976][24592] Fps is (10 sec: 39321.9, 60 sec: 36727.3, 300 sec: 39821.4). Total num frames: 2430394368. Throughput: 0: 9097.2. Samples: 357596634. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:23,977][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:24,594][626795] Updated weights for policy 0, policy_version 296682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:26,935][626795] Updated weights for policy 0, policy_version 296692 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:28,976][24592] Fps is (10 sec: 36860.5, 60 sec: 36317.8, 300 sec: 39682.5). Total num frames: 2430558208. Throughput: 0: 9069.3. Samples: 357623190. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:28,978][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:29,701][626795] Updated weights for policy 0, policy_version 296702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:32,227][626795] Updated weights for policy 0, policy_version 296712 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:33,978][24592] Fps is (10 sec: 33578.7, 60 sec: 36043.3, 300 sec: 39543.4). Total num frames: 2430730240. Throughput: 0: 8856.0. Samples: 357670278. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:33,979][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:34,484][626795] Updated weights for policy 0, policy_version 296722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:36,591][626795] Updated weights for policy 0, policy_version 296732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:38,753][626795] Updated weights for policy 0, policy_version 296742 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:38,976][24592] Fps is (10 sec: 35228.4, 60 sec: 36044.7, 300 sec: 39432.7). Total num frames: 2430910464. Throughput: 0: 8948.7. Samples: 357727116. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:38,977][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:40,983][626795] Updated weights for policy 0, policy_version 296752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:43,027][626795] Updated weights for policy 0, policy_version 296762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:43,975][24592] Fps is (10 sec: 37693.0, 60 sec: 36046.1, 300 sec: 39377.1). Total num frames: 2431107072. Throughput: 0: 9008.8. Samples: 357755190. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:43,979][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:45,152][626795] Updated weights for policy 0, policy_version 296772 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:47,178][626795] Updated weights for policy 0, policy_version 296782 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:48,975][24592] Fps is (10 sec: 38503.2, 60 sec: 36044.8, 300 sec: 39321.6). Total num frames: 2431295488. Throughput: 0: 9051.6. Samples: 357814590. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:48,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:49,295][626795] Updated weights for policy 0, policy_version 296792 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:51,330][626795] Updated weights for policy 0, policy_version 296802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:53,450][626795] Updated weights for policy 0, policy_version 296812 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:53,976][24592] Fps is (10 sec: 39320.9, 60 sec: 36590.8, 300 sec: 39321.6). Total num frames: 2431500288. Throughput: 0: 9215.3. Samples: 357874320. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:53,977][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:55,483][626795] Updated weights for policy 0, policy_version 296822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:57,553][626795] Updated weights for policy 0, policy_version 296832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:58,975][24592] Fps is (10 sec: 40960.0, 60 sec: 37000.6, 300 sec: 39293.8). Total num frames: 2431705088. Throughput: 0: 9345.9. Samples: 357902832. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:42:58,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:42:59,718][626795] Updated weights for policy 0, policy_version 296842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:01,616][626795] Updated weights for policy 0, policy_version 296852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:03,650][626795] Updated weights for policy 0, policy_version 296862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:03,975][24592] Fps is (10 sec: 40141.7, 60 sec: 37000.6, 300 sec: 39238.3). Total num frames: 2431901696. Throughput: 0: 9485.5. Samples: 357963852. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:03,976][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:03,996][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000296863_2431901696.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:04,070][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000295730_2422620160.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:05,858][626795] Updated weights for policy 0, policy_version 296872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:07,710][626795] Updated weights for policy 0, policy_version 296882 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:08,976][24592] Fps is (10 sec: 39319.5, 60 sec: 37682.9, 300 sec: 39182.7). Total num frames: 2432098304. Throughput: 0: 9483.9. Samples: 358023414. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:08,979][24592] Avg episode reward: [(0, '4.402')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:09,833][626795] Updated weights for policy 0, policy_version 296892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:11,849][626795] Updated weights for policy 0, policy_version 296902 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:13,799][626795] Updated weights for policy 0, policy_version 296912 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:13,975][24592] Fps is (10 sec: 40140.7, 60 sec: 38366.0, 300 sec: 39182.8). Total num frames: 2432303104. Throughput: 0: 9569.3. Samples: 358053798. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:13,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:15,883][626795] Updated weights for policy 0, policy_version 296922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:17,775][626795] Updated weights for policy 0, policy_version 296932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:18,975][24592] Fps is (10 sec: 40962.0, 60 sec: 38638.9, 300 sec: 39182.8). Total num frames: 2432507904. Throughput: 0: 9903.0. Samples: 358115886. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:18,977][24592] Avg episode reward: [(0, '4.392')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:19,854][626795] Updated weights for policy 0, policy_version 296942 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:21,732][626795] Updated weights for policy 0, policy_version 296952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:23,806][626795] Updated weights for policy 0, policy_version 296962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:23,976][24592] Fps is (10 sec: 40959.6, 60 sec: 38638.9, 300 sec: 39182.8). Total num frames: 2432712704. Throughput: 0: 10002.5. Samples: 358177230. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:23,976][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:25,984][626795] Updated weights for policy 0, policy_version 296972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:28,644][626795] Updated weights for policy 0, policy_version 296982 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:28,975][24592] Fps is (10 sec: 37683.4, 60 sec: 38776.1, 300 sec: 39044.8). Total num frames: 2432884736. Throughput: 0: 9996.2. Samples: 358205016. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:28,978][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:30,962][626795] Updated weights for policy 0, policy_version 296992 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:32,897][626795] Updated weights for policy 0, policy_version 297002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:33,975][24592] Fps is (10 sec: 35226.1, 60 sec: 38913.7, 300 sec: 38960.6). Total num frames: 2433064960. Throughput: 0: 9840.7. Samples: 358257420. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:33,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:35,687][626795] Updated weights for policy 0, policy_version 297012 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:38,488][626795] Updated weights for policy 0, policy_version 297022 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:38,976][24592] Fps is (10 sec: 32767.0, 60 sec: 38365.8, 300 sec: 38738.4). Total num frames: 2433212416. Throughput: 0: 9542.5. Samples: 358303734. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:38,978][24592] Avg episode reward: [(0, '4.420')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:41,278][626795] Updated weights for policy 0, policy_version 297032 (0.0037)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:43,445][626795] Updated weights for policy 0, policy_version 297042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:43,975][24592] Fps is (10 sec: 31948.6, 60 sec: 37956.3, 300 sec: 38599.6). Total num frames: 2433384448. Throughput: 0: 9406.5. Samples: 358326126. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:43,977][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:45,604][626795] Updated weights for policy 0, policy_version 297052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:47,731][626795] Updated weights for policy 0, policy_version 297062 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:48,977][24592] Fps is (10 sec: 36860.9, 60 sec: 38092.1, 300 sec: 38571.7). Total num frames: 2433581056. Throughput: 0: 9326.6. Samples: 358383558. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:48,979][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:49,831][626795] Updated weights for policy 0, policy_version 297072 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:52,000][626795] Updated weights for policy 0, policy_version 297082 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:53,975][24592] Fps is (10 sec: 38502.6, 60 sec: 37819.9, 300 sec: 38517.3). Total num frames: 2433769472. Throughput: 0: 9261.8. Samples: 358440192. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:53,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:54,064][626795] Updated weights for policy 0, policy_version 297092 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:56,190][626795] Updated weights for policy 0, policy_version 297102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:43:58,278][626795] Updated weights for policy 0, policy_version 297112 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:58,975][24592] Fps is (10 sec: 37687.0, 60 sec: 37546.6, 300 sec: 38460.7). Total num frames: 2433957888. Throughput: 0: 9240.1. Samples: 358469604. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:43:58,976][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:00,502][626795] Updated weights for policy 0, policy_version 297122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:02,380][626795] Updated weights for policy 0, policy_version 297132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:03,975][24592] Fps is (10 sec: 39321.7, 60 sec: 37683.2, 300 sec: 38460.7). Total num frames: 2434162688. Throughput: 0: 9181.3. Samples: 358529046. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:03,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:04,743][626795] Updated weights for policy 0, policy_version 297142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:06,656][626795] Updated weights for policy 0, policy_version 297152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:08,835][626795] Updated weights for policy 0, policy_version 297162 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:08,976][24592] Fps is (10 sec: 39321.3, 60 sec: 37546.9, 300 sec: 38349.8). Total num frames: 2434351104. Throughput: 0: 9110.8. Samples: 358587216. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:08,979][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:10,876][626795] Updated weights for policy 0, policy_version 297172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:13,098][626795] Updated weights for policy 0, policy_version 297182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:13,975][24592] Fps is (10 sec: 38502.0, 60 sec: 37410.1, 300 sec: 38321.9). Total num frames: 2434547712. Throughput: 0: 9120.2. Samples: 358615428. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:13,976][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:15,172][626795] Updated weights for policy 0, policy_version 297192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:17,277][626795] Updated weights for policy 0, policy_version 297202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:18,976][24592] Fps is (10 sec: 40140.5, 60 sec: 37410.0, 300 sec: 38321.9). Total num frames: 2434752512. Throughput: 0: 9295.0. Samples: 358675698. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:18,977][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:19,158][626795] Updated weights for policy 0, policy_version 297212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:21,183][626795] Updated weights for policy 0, policy_version 297222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:23,083][626795] Updated weights for policy 0, policy_version 297232 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:23,975][24592] Fps is (10 sec: 40960.3, 60 sec: 37410.2, 300 sec: 38321.9). Total num frames: 2434957312. Throughput: 0: 9634.2. Samples: 358737270. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:23,977][24592] Avg episode reward: [(0, '4.474')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:25,010][626772] Signal inference workers to stop experience collection... (4650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:25,016][626772] Signal inference workers to resume experience collection... (4650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:25,024][626795] InferenceWorker_p0-w0: stopping experience collection (4650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:25,027][626795] InferenceWorker_p0-w0: resuming experience collection (4650 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:25,062][626795] Updated weights for policy 0, policy_version 297242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:27,094][626795] Updated weights for policy 0, policy_version 297252 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:28,976][24592] Fps is (10 sec: 40959.9, 60 sec: 37956.1, 300 sec: 38322.0). Total num frames: 2435162112. Throughput: 0: 9826.1. Samples: 358768302. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:28,978][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:29,151][626795] Updated weights for policy 0, policy_version 297262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:30,955][626795] Updated weights for policy 0, policy_version 297272 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:32,971][626795] Updated weights for policy 0, policy_version 297282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:33,975][24592] Fps is (10 sec: 40960.1, 60 sec: 38365.9, 300 sec: 38294.1). Total num frames: 2435366912. Throughput: 0: 9943.1. Samples: 358830984. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:33,976][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:35,029][626795] Updated weights for policy 0, policy_version 297292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:36,982][626795] Updated weights for policy 0, policy_version 297302 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:38,880][626795] Updated weights for policy 0, policy_version 297312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:38,975][24592] Fps is (10 sec: 41780.5, 60 sec: 39458.3, 300 sec: 38294.1). Total num frames: 2435579904. Throughput: 0: 10073.9. Samples: 358893516. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:38,977][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:40,930][626795] Updated weights for policy 0, policy_version 297322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:42,899][626795] Updated weights for policy 0, policy_version 297332 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:43,976][24592] Fps is (10 sec: 40958.2, 60 sec: 39867.5, 300 sec: 38266.3). Total num frames: 2435776512. Throughput: 0: 10097.1. Samples: 358923978. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:43,978][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:45,148][626795] Updated weights for policy 0, policy_version 297342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:47,111][626795] Updated weights for policy 0, policy_version 297352 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:48,975][24592] Fps is (10 sec: 39321.1, 60 sec: 39868.4, 300 sec: 38211.0). Total num frames: 2435973120. Throughput: 0: 10086.5. Samples: 358982940. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:48,977][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:49,280][626795] Updated weights for policy 0, policy_version 297362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:51,314][626795] Updated weights for policy 0, policy_version 297372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:53,443][626795] Updated weights for policy 0, policy_version 297382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:53,975][24592] Fps is (10 sec: 39323.0, 60 sec: 40004.2, 300 sec: 38210.8). Total num frames: 2436169728. Throughput: 0: 10090.7. Samples: 359041296. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:53,976][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:55,447][626795] Updated weights for policy 0, policy_version 297392 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:57,392][626795] Updated weights for policy 0, policy_version 297402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:58,981][24592] Fps is (10 sec: 40120.1, 60 sec: 40273.9, 300 sec: 38210.2). Total num frames: 2436374528. Throughput: 0: 10135.8. Samples: 359071590. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:44:58,982][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:44:59,607][626795] Updated weights for policy 0, policy_version 297412 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:01,659][626795] Updated weights for policy 0, policy_version 297422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:03,719][626795] Updated weights for policy 0, policy_version 297432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:03,975][24592] Fps is (10 sec: 40140.7, 60 sec: 40140.7, 300 sec: 38210.8). Total num frames: 2436571136. Throughput: 0: 10120.6. Samples: 359131122. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:03,976][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000297433_2436571136.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:04,105][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000296323_2427478016.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:05,715][626795] Updated weights for policy 0, policy_version 297442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:07,831][626795] Updated weights for policy 0, policy_version 297452 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:08,983][24592] Fps is (10 sec: 37673.7, 60 sec: 39999.2, 300 sec: 38126.5). Total num frames: 2436751360. Throughput: 0: 10032.3. Samples: 359188800. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:09,000][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:10,991][626795] Updated weights for policy 0, policy_version 297462 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:13,156][626795] Updated weights for policy 0, policy_version 297472 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:13,976][24592] Fps is (10 sec: 34405.7, 60 sec: 39458.0, 300 sec: 37960.9). Total num frames: 2436915200. Throughput: 0: 9761.1. Samples: 359207550. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:13,977][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:15,211][626795] Updated weights for policy 0, policy_version 297482 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:17,334][626795] Updated weights for policy 0, policy_version 297492 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:18,976][24592] Fps is (10 sec: 36071.6, 60 sec: 39321.6, 300 sec: 37933.1). Total num frames: 2437111808. Throughput: 0: 9699.1. Samples: 359267448. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:18,977][24592] Avg episode reward: [(0, '4.505')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:19,575][626795] Updated weights for policy 0, policy_version 297502 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:21,764][626795] Updated weights for policy 0, policy_version 297512 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:23,938][626795] Updated weights for policy 0, policy_version 297522 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:23,975][24592] Fps is (10 sec: 38503.4, 60 sec: 39048.5, 300 sec: 37960.9). Total num frames: 2437300224. Throughput: 0: 9538.4. Samples: 359322744. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:23,977][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:26,175][626795] Updated weights for policy 0, policy_version 297532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:28,218][626795] Updated weights for policy 0, policy_version 297542 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:28,977][24592] Fps is (10 sec: 37678.2, 60 sec: 38774.6, 300 sec: 37905.2). Total num frames: 2437488640. Throughput: 0: 9484.0. Samples: 359350770. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:28,978][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:30,543][626795] Updated weights for policy 0, policy_version 297552 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:32,863][626795] Updated weights for policy 0, policy_version 297562 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:33,975][24592] Fps is (10 sec: 36864.1, 60 sec: 38365.8, 300 sec: 37849.8). Total num frames: 2437668864. Throughput: 0: 9429.6. Samples: 359407272. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:33,976][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:35,068][626795] Updated weights for policy 0, policy_version 297572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:36,989][626795] Updated weights for policy 0, policy_version 297582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:38,933][626795] Updated weights for policy 0, policy_version 297592 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:38,975][24592] Fps is (10 sec: 38508.7, 60 sec: 38229.3, 300 sec: 37822.1). Total num frames: 2437873664. Throughput: 0: 9439.7. Samples: 359466084. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:38,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:41,141][626795] Updated weights for policy 0, policy_version 297602 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:43,047][626795] Updated weights for policy 0, policy_version 297612 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:43,976][24592] Fps is (10 sec: 40137.8, 60 sec: 38229.1, 300 sec: 37794.2). Total num frames: 2438070272. Throughput: 0: 9420.9. Samples: 359495490. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:43,978][24592] Avg episode reward: [(0, '4.822')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:45,286][626795] Updated weights for policy 0, policy_version 297622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:47,609][626795] Updated weights for policy 0, policy_version 297632 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:48,976][24592] Fps is (10 sec: 37682.5, 60 sec: 37956.2, 300 sec: 37738.8). Total num frames: 2438250496. Throughput: 0: 9341.3. Samples: 359551482. Policy #0 lag: (min: 0.0, avg: 2.6, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:48,978][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:50,099][626795] Updated weights for policy 0, policy_version 297642 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:52,149][626795] Updated weights for policy 0, policy_version 297652 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:53,976][24592] Fps is (10 sec: 35227.7, 60 sec: 37546.6, 300 sec: 37738.7). Total num frames: 2438422528. Throughput: 0: 9230.0. Samples: 359604078. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:53,977][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:54,549][626795] Updated weights for policy 0, policy_version 297662 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:56,592][626795] Updated weights for policy 0, policy_version 297672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:45:58,578][626795] Updated weights for policy 0, policy_version 297682 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:58,975][24592] Fps is (10 sec: 37683.9, 60 sec: 37550.0, 300 sec: 37794.3). Total num frames: 2438627328. Throughput: 0: 9477.7. Samples: 359634042. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:45:58,976][24592] Avg episode reward: [(0, '4.919')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:00,556][626795] Updated weights for policy 0, policy_version 297692 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:02,577][626795] Updated weights for policy 0, policy_version 297702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:03,975][24592] Fps is (10 sec: 40141.2, 60 sec: 37546.7, 300 sec: 37849.8). Total num frames: 2438823936. Throughput: 0: 9501.0. Samples: 359694990. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:03,977][24592] Avg episode reward: [(0, '4.373')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:04,798][626795] Updated weights for policy 0, policy_version 297712 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:07,127][626795] Updated weights for policy 0, policy_version 297722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:08,976][24592] Fps is (10 sec: 37682.3, 60 sec: 37551.4, 300 sec: 37877.8). Total num frames: 2439004160. Throughput: 0: 9502.4. Samples: 359750352. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:08,979][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:09,241][626795] Updated weights for policy 0, policy_version 297732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:11,349][626795] Updated weights for policy 0, policy_version 297742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:13,358][626795] Updated weights for policy 0, policy_version 297752 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:13,976][24592] Fps is (10 sec: 38500.0, 60 sec: 38229.1, 300 sec: 37933.1). Total num frames: 2439208960. Throughput: 0: 9548.2. Samples: 359780430. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:13,978][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:15,467][626795] Updated weights for policy 0, policy_version 297762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:17,766][626795] Updated weights for policy 0, policy_version 297772 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:18,975][24592] Fps is (10 sec: 38503.2, 60 sec: 37956.4, 300 sec: 37960.9). Total num frames: 2439389184. Throughput: 0: 9549.2. Samples: 359836986. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:18,979][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:20,081][626795] Updated weights for policy 0, policy_version 297782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:22,022][626795] Updated weights for policy 0, policy_version 297792 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:23,976][24592] Fps is (10 sec: 37682.4, 60 sec: 38092.3, 300 sec: 37988.7). Total num frames: 2439585792. Throughput: 0: 9533.7. Samples: 359895108. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:23,978][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:24,042][626795] Updated weights for policy 0, policy_version 297802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:26,340][626795] Updated weights for policy 0, policy_version 297812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:28,472][626795] Updated weights for policy 0, policy_version 297822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:28,976][24592] Fps is (10 sec: 38500.8, 60 sec: 38093.6, 300 sec: 37988.6). Total num frames: 2439774208. Throughput: 0: 9500.6. Samples: 359923014. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:28,977][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:30,667][626795] Updated weights for policy 0, policy_version 297832 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:32,726][626795] Updated weights for policy 0, policy_version 297842 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:33,975][24592] Fps is (10 sec: 38505.8, 60 sec: 38365.9, 300 sec: 38044.2). Total num frames: 2439970816. Throughput: 0: 9528.7. Samples: 359980272. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:33,976][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:35,011][626795] Updated weights for policy 0, policy_version 297852 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:37,005][626795] Updated weights for policy 0, policy_version 297862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:38,918][626795] Updated weights for policy 0, policy_version 297872 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:38,975][24592] Fps is (10 sec: 39323.1, 60 sec: 38229.3, 300 sec: 38044.5). Total num frames: 2440167424. Throughput: 0: 9680.6. Samples: 360039702. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:38,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:41,140][626795] Updated weights for policy 0, policy_version 297882 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:43,362][626795] Updated weights for policy 0, policy_version 297892 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:43,975][24592] Fps is (10 sec: 38502.6, 60 sec: 38093.3, 300 sec: 38044.2). Total num frames: 2440355840. Throughput: 0: 9642.3. Samples: 360067944. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:43,978][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:45,607][626795] Updated weights for policy 0, policy_version 297902 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:47,444][626795] Updated weights for policy 0, policy_version 297912 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:48,975][24592] Fps is (10 sec: 38502.8, 60 sec: 38366.0, 300 sec: 38127.5). Total num frames: 2440552448. Throughput: 0: 9575.8. Samples: 360125898. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:48,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:49,536][626795] Updated weights for policy 0, policy_version 297922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:51,578][626795] Updated weights for policy 0, policy_version 297932 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:53,877][626795] Updated weights for policy 0, policy_version 297942 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:53,976][24592] Fps is (10 sec: 38501.5, 60 sec: 38638.9, 300 sec: 38155.3). Total num frames: 2440740864. Throughput: 0: 9652.1. Samples: 360184698. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:53,978][24592] Avg episode reward: [(0, '4.791')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:55,913][626795] Updated weights for policy 0, policy_version 297952 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:46:58,019][626795] Updated weights for policy 0, policy_version 297962 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:58,976][24592] Fps is (10 sec: 38501.6, 60 sec: 38502.3, 300 sec: 38155.3). Total num frames: 2440937472. Throughput: 0: 9636.8. Samples: 360214080. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:46:58,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:00,282][626795] Updated weights for policy 0, policy_version 297972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:02,311][626795] Updated weights for policy 0, policy_version 297982 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:03,975][24592] Fps is (10 sec: 39321.9, 60 sec: 38502.4, 300 sec: 38294.1). Total num frames: 2441134080. Throughput: 0: 9650.2. Samples: 360271248. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:03,979][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000297990_2441134080.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:04,125][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000296863_2431901696.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:04,278][626795] Updated weights for policy 0, policy_version 297992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:06,461][626795] Updated weights for policy 0, policy_version 298002 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:08,592][626795] Updated weights for policy 0, policy_version 298012 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:08,975][24592] Fps is (10 sec: 39321.9, 60 sec: 38775.6, 300 sec: 38405.2). Total num frames: 2441330688. Throughput: 0: 9676.6. Samples: 360330546. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:08,977][24592] Avg episode reward: [(0, '4.906')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:10,813][626795] Updated weights for policy 0, policy_version 298022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:12,869][626795] Updated weights for policy 0, policy_version 298032 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:13,975][24592] Fps is (10 sec: 39321.9, 60 sec: 38639.4, 300 sec: 38433.0). Total num frames: 2441527296. Throughput: 0: 9678.5. Samples: 360358542. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:13,977][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:14,839][626795] Updated weights for policy 0, policy_version 298042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:17,277][626795] Updated weights for policy 0, policy_version 298052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:18,975][24592] Fps is (10 sec: 37683.7, 60 sec: 38639.0, 300 sec: 38349.7). Total num frames: 2441707520. Throughput: 0: 9663.9. Samples: 360415146. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:18,977][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:19,396][626795] Updated weights for policy 0, policy_version 298062 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:21,510][626795] Updated weights for policy 0, policy_version 298072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:23,400][626795] Updated weights for policy 0, policy_version 298082 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:23,975][24592] Fps is (10 sec: 36864.1, 60 sec: 38503.0, 300 sec: 38433.1). Total num frames: 2441895936. Throughput: 0: 9653.6. Samples: 360474114. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:23,978][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:25,828][626795] Updated weights for policy 0, policy_version 298092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:28,047][626795] Updated weights for policy 0, policy_version 298102 (0.0033)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:28,975][24592] Fps is (10 sec: 36864.0, 60 sec: 38366.2, 300 sec: 38461.1). Total num frames: 2442076160. Throughput: 0: 9614.0. Samples: 360500574. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:28,978][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:30,588][626795] Updated weights for policy 0, policy_version 298112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:32,592][626795] Updated weights for policy 0, policy_version 298122 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:33,975][24592] Fps is (10 sec: 36863.9, 60 sec: 38229.3, 300 sec: 38488.5). Total num frames: 2442264576. Throughput: 0: 9538.0. Samples: 360555108. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:33,977][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:34,765][626795] Updated weights for policy 0, policy_version 298132 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:36,932][626795] Updated weights for policy 0, policy_version 298142 (0.0030)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:38,975][24592] Fps is (10 sec: 37682.6, 60 sec: 38092.8, 300 sec: 38460.7). Total num frames: 2442452992. Throughput: 0: 9498.3. Samples: 360612120. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:38,979][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:39,247][626795] Updated weights for policy 0, policy_version 298152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:41,872][626795] Updated weights for policy 0, policy_version 298162 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:43,975][24592] Fps is (10 sec: 34406.6, 60 sec: 37546.7, 300 sec: 38349.7). Total num frames: 2442608640. Throughput: 0: 9342.3. Samples: 360634482. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:43,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:44,350][626795] Updated weights for policy 0, policy_version 298172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:46,831][626795] Updated weights for policy 0, policy_version 298182 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:48,977][24592] Fps is (10 sec: 32762.6, 60 sec: 37136.0, 300 sec: 38238.4). Total num frames: 2442780672. Throughput: 0: 9185.1. Samples: 360684594. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:48,980][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:49,225][626795] Updated weights for policy 0, policy_version 298192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:51,458][626795] Updated weights for policy 0, policy_version 298202 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:53,418][626795] Updated weights for policy 0, policy_version 298212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:53,976][24592] Fps is (10 sec: 36044.2, 60 sec: 37137.1, 300 sec: 38183.0). Total num frames: 2442969088. Throughput: 0: 9128.3. Samples: 360741318. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:53,979][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:55,527][626795] Updated weights for policy 0, policy_version 298222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:57,547][626795] Updated weights for policy 0, policy_version 298232 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:58,976][24592] Fps is (10 sec: 38508.6, 60 sec: 37137.1, 300 sec: 38183.0). Total num frames: 2443165696. Throughput: 0: 9165.8. Samples: 360771006. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:47:58,979][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:47:59,711][626795] Updated weights for policy 0, policy_version 298242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:01,802][626795] Updated weights for policy 0, policy_version 298252 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:03,975][24592] Fps is (10 sec: 37683.5, 60 sec: 36864.0, 300 sec: 38127.6). Total num frames: 2443345920. Throughput: 0: 9153.8. Samples: 360827070. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:03,976][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:04,194][626795] Updated weights for policy 0, policy_version 298262 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:06,653][626795] Updated weights for policy 0, policy_version 298272 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:08,972][626795] Updated weights for policy 0, policy_version 298282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:08,976][24592] Fps is (10 sec: 36044.7, 60 sec: 36590.9, 300 sec: 38044.2). Total num frames: 2443526144. Throughput: 0: 9006.4. Samples: 360879402. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:08,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:11,282][626795] Updated weights for policy 0, policy_version 298292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:13,461][626795] Updated weights for policy 0, policy_version 298302 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:13,975][24592] Fps is (10 sec: 36045.1, 60 sec: 36317.9, 300 sec: 37960.9). Total num frames: 2443706368. Throughput: 0: 9010.3. Samples: 360906036. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:13,979][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:15,531][626795] Updated weights for policy 0, policy_version 298312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:17,437][626795] Updated weights for policy 0, policy_version 298322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:18,975][24592] Fps is (10 sec: 38503.0, 60 sec: 36727.4, 300 sec: 37960.9). Total num frames: 2443911168. Throughput: 0: 9129.6. Samples: 360965940. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:18,978][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:19,526][626795] Updated weights for policy 0, policy_version 298332 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:21,544][626795] Updated weights for policy 0, policy_version 298342 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:23,541][626795] Updated weights for policy 0, policy_version 298352 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:23,975][24592] Fps is (10 sec: 40960.0, 60 sec: 37000.5, 300 sec: 38072.0). Total num frames: 2444115968. Throughput: 0: 9220.6. Samples: 361027044. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:23,978][24592] Avg episode reward: [(0, '4.423')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:25,655][626795] Updated weights for policy 0, policy_version 298362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:27,728][626795] Updated weights for policy 0, policy_version 298372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:28,975][24592] Fps is (10 sec: 40141.0, 60 sec: 37273.6, 300 sec: 38127.5). Total num frames: 2444312576. Throughput: 0: 9386.9. Samples: 361056894. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:28,977][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:29,773][626795] Updated weights for policy 0, policy_version 298382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:31,825][626795] Updated weights for policy 0, policy_version 298392 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:33,882][626795] Updated weights for policy 0, policy_version 298402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:33,975][24592] Fps is (10 sec: 39321.4, 60 sec: 37410.1, 300 sec: 38294.2). Total num frames: 2444509184. Throughput: 0: 9573.2. Samples: 361115370. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:33,978][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:36,164][626795] Updated weights for policy 0, policy_version 298412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:38,170][626795] Updated weights for policy 0, policy_version 298422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:38,975][24592] Fps is (10 sec: 38502.1, 60 sec: 37410.2, 300 sec: 38349.7). Total num frames: 2444697600. Throughput: 0: 9590.3. Samples: 361172880. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:38,977][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:40,735][626795] Updated weights for policy 0, policy_version 298432 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:43,299][626795] Updated weights for policy 0, policy_version 298442 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:43,975][24592] Fps is (10 sec: 35225.8, 60 sec: 37546.7, 300 sec: 38238.7). Total num frames: 2444861440. Throughput: 0: 9487.1. Samples: 361197924. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:43,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:45,447][626795] Updated weights for policy 0, policy_version 298452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:47,536][626795] Updated weights for policy 0, policy_version 298462 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:48,975][24592] Fps is (10 sec: 34406.7, 60 sec: 37684.3, 300 sec: 38210.8). Total num frames: 2445041664. Throughput: 0: 9431.8. Samples: 361251498. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:48,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:50,083][626795] Updated weights for policy 0, policy_version 298472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:52,424][626795] Updated weights for policy 0, policy_version 298482 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:53,976][24592] Fps is (10 sec: 33586.4, 60 sec: 37137.0, 300 sec: 38099.7). Total num frames: 2445197312. Throughput: 0: 9343.2. Samples: 361299846. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:53,977][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:55,500][626795] Updated weights for policy 0, policy_version 298492 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:48:57,857][626795] Updated weights for policy 0, policy_version 298502 (0.0033)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:58,976][24592] Fps is (10 sec: 32767.5, 60 sec: 36727.5, 300 sec: 37988.6). Total num frames: 2445369344. Throughput: 0: 9234.1. Samples: 361321572. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:48:58,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:00,121][626795] Updated weights for policy 0, policy_version 298512 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:02,592][626795] Updated weights for policy 0, policy_version 298522 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:03,976][24592] Fps is (10 sec: 33586.8, 60 sec: 36454.2, 300 sec: 37905.3). Total num frames: 2445533184. Throughput: 0: 9066.6. Samples: 361373940. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:03,979][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000298528_2445541376.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:04,078][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000297433_2436571136.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:04,907][626795] Updated weights for policy 0, policy_version 298532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:06,982][626795] Updated weights for policy 0, policy_version 298542 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:08,975][24592] Fps is (10 sec: 36044.7, 60 sec: 36727.5, 300 sec: 37905.4). Total num frames: 2445729792. Throughput: 0: 8989.0. Samples: 361431552. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:08,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:09,032][626795] Updated weights for policy 0, policy_version 298552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:11,559][626795] Updated weights for policy 0, policy_version 298562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:13,975][24592] Fps is (10 sec: 35226.9, 60 sec: 36317.9, 300 sec: 37738.8). Total num frames: 2445885440. Throughput: 0: 8882.0. Samples: 361456584. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:13,978][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:14,319][626795] Updated weights for policy 0, policy_version 298572 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:17,105][626795] Updated weights for policy 0, policy_version 298582 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:18,977][24592] Fps is (10 sec: 31125.8, 60 sec: 35497.9, 300 sec: 37572.0). Total num frames: 2446041088. Throughput: 0: 8573.5. Samples: 361501188. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:18,978][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:19,803][626795] Updated weights for policy 0, policy_version 298592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:22,642][626795] Updated weights for policy 0, policy_version 298602 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:23,975][24592] Fps is (10 sec: 30310.2, 60 sec: 34542.9, 300 sec: 37377.8). Total num frames: 2446188544. Throughput: 0: 8303.2. Samples: 361546524. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:23,977][24592] Avg episode reward: [(0, '4.960')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:25,239][626795] Updated weights for policy 0, policy_version 298612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:27,744][626795] Updated weights for policy 0, policy_version 298622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:28,976][24592] Fps is (10 sec: 30313.3, 60 sec: 33860.0, 300 sec: 37211.1). Total num frames: 2446344192. Throughput: 0: 8256.5. Samples: 361569468. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:28,978][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:30,296][626795] Updated weights for policy 0, policy_version 298632 (0.0036)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:33,212][626795] Updated weights for policy 0, policy_version 298642 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:33,975][24592] Fps is (10 sec: 31129.9, 60 sec: 33177.6, 300 sec: 37016.7). Total num frames: 2446499840. Throughput: 0: 8151.6. Samples: 361618320. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:33,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:35,785][626795] Updated weights for policy 0, policy_version 298652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:38,342][626795] Updated weights for policy 0, policy_version 298662 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:38,975][24592] Fps is (10 sec: 31130.8, 60 sec: 32631.5, 300 sec: 36877.9). Total num frames: 2446655488. Throughput: 0: 8075.5. Samples: 361663242. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:38,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:41,225][626795] Updated weights for policy 0, policy_version 298672 (0.0045)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:43,658][626795] Updated weights for policy 0, policy_version 298682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:43,975][24592] Fps is (10 sec: 31129.6, 60 sec: 32494.9, 300 sec: 36739.0). Total num frames: 2446811136. Throughput: 0: 8071.1. Samples: 361684770. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:43,976][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:46,330][626795] Updated weights for policy 0, policy_version 298692 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:48,975][24592] Fps is (10 sec: 29491.0, 60 sec: 31812.2, 300 sec: 36544.6). Total num frames: 2446950400. Throughput: 0: 7955.0. Samples: 361731912. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:48,978][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:49,432][626795] Updated weights for policy 0, policy_version 298702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:52,223][626795] Updated weights for policy 0, policy_version 298712 (0.0043)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:53,976][24592] Fps is (10 sec: 28670.5, 60 sec: 31675.6, 300 sec: 36350.8). Total num frames: 2447097856. Throughput: 0: 7605.8. Samples: 361773816. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:53,978][24592] Avg episode reward: [(0, '4.966')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:54,901][626795] Updated weights for policy 0, policy_version 298722 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:57,390][626795] Updated weights for policy 0, policy_version 298732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:58,976][24592] Fps is (10 sec: 31948.6, 60 sec: 31675.7, 300 sec: 36267.0). Total num frames: 2447269888. Throughput: 0: 7576.4. Samples: 361797522. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:49:58,976][24592] Avg episode reward: [(0, '4.336')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:49:59,665][626795] Updated weights for policy 0, policy_version 298742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:01,982][626795] Updated weights for policy 0, policy_version 298752 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:03,975][24592] Fps is (10 sec: 35227.5, 60 sec: 31949.0, 300 sec: 36267.9). Total num frames: 2447450112. Throughput: 0: 7781.3. Samples: 361851336. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:03,978][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:04,265][626795] Updated weights for policy 0, policy_version 298762 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:06,555][626795] Updated weights for policy 0, policy_version 298772 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:08,868][626795] Updated weights for policy 0, policy_version 298782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:08,976][24592] Fps is (10 sec: 35223.0, 60 sec: 31538.8, 300 sec: 36294.7). Total num frames: 2447622144. Throughput: 0: 7960.1. Samples: 361904736. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:08,980][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:11,024][626795] Updated weights for policy 0, policy_version 298792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:13,118][626795] Updated weights for policy 0, policy_version 298802 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:13,976][24592] Fps is (10 sec: 36043.7, 60 sec: 32085.2, 300 sec: 36267.0). Total num frames: 2447810560. Throughput: 0: 8074.4. Samples: 361932816. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:13,977][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:15,269][626795] Updated weights for policy 0, policy_version 298812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:17,400][626795] Updated weights for policy 0, policy_version 298822 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:18,975][24592] Fps is (10 sec: 38505.8, 60 sec: 32768.7, 300 sec: 36294.7). Total num frames: 2448007168. Throughput: 0: 8287.5. Samples: 361991256. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:18,977][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:19,725][626795] Updated weights for policy 0, policy_version 298832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:21,852][626795] Updated weights for policy 0, policy_version 298842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:23,885][626795] Updated weights for policy 0, policy_version 298852 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:23,976][24592] Fps is (10 sec: 38502.8, 60 sec: 33450.6, 300 sec: 36294.9). Total num frames: 2448195584. Throughput: 0: 8535.0. Samples: 362047320. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:23,978][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:25,894][626795] Updated weights for policy 0, policy_version 298862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:27,970][626795] Updated weights for policy 0, policy_version 298872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:28,976][24592] Fps is (10 sec: 39320.3, 60 sec: 34269.9, 300 sec: 36378.0). Total num frames: 2448400384. Throughput: 0: 8711.9. Samples: 362076810. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:28,978][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:30,119][626795] Updated weights for policy 0, policy_version 298882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:32,202][626795] Updated weights for policy 0, policy_version 298892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:33,975][24592] Fps is (10 sec: 40141.0, 60 sec: 34952.5, 300 sec: 36350.2). Total num frames: 2448596992. Throughput: 0: 8996.5. Samples: 362136756. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:33,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:34,140][626795] Updated weights for policy 0, policy_version 298902 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:36,295][626795] Updated weights for policy 0, policy_version 298912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:38,229][626795] Updated weights for policy 0, policy_version 298922 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:38,976][24592] Fps is (10 sec: 39320.6, 60 sec: 35634.9, 300 sec: 36350.3). Total num frames: 2448793600. Throughput: 0: 9383.6. Samples: 362196078. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:38,977][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:40,663][626795] Updated weights for policy 0, policy_version 298932 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:42,702][626795] Updated weights for policy 0, policy_version 298942 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:43,975][24592] Fps is (10 sec: 37683.5, 60 sec: 36044.8, 300 sec: 36350.3). Total num frames: 2448973824. Throughput: 0: 9467.5. Samples: 362223558. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:43,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:44,806][626795] Updated weights for policy 0, policy_version 298952 (0.0037)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:46,988][626795] Updated weights for policy 0, policy_version 298962 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:48,975][24592] Fps is (10 sec: 37685.1, 60 sec: 37000.5, 300 sec: 36433.6). Total num frames: 2449170432. Throughput: 0: 9574.1. Samples: 362282172. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:48,976][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:49,104][626795] Updated weights for policy 0, policy_version 298972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:51,273][626795] Updated weights for policy 0, policy_version 298982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:53,172][626795] Updated weights for policy 0, policy_version 298992 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:53,976][24592] Fps is (10 sec: 39320.6, 60 sec: 37819.9, 300 sec: 36405.8). Total num frames: 2449367040. Throughput: 0: 9672.0. Samples: 362339970. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:53,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:55,577][626795] Updated weights for policy 0, policy_version 299002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:57,409][626795] Updated weights for policy 0, policy_version 299012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:58,975][24592] Fps is (10 sec: 39322.0, 60 sec: 38229.4, 300 sec: 36405.8). Total num frames: 2449563648. Throughput: 0: 9681.3. Samples: 362368470. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:50:58,976][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:50:59,676][626795] Updated weights for policy 0, policy_version 299022 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:01,960][626795] Updated weights for policy 0, policy_version 299032 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:03,976][24592] Fps is (10 sec: 37683.2, 60 sec: 38229.1, 300 sec: 36405.8). Total num frames: 2449743872. Throughput: 0: 9650.5. Samples: 362425530. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:03,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000299042_2449752064.pth...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:03,993][626795] Updated weights for policy 0, policy_version 299042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:04,078][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000297990_2441134080.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:06,202][626795] Updated weights for policy 0, policy_version 299052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:08,492][626795] Updated weights for policy 0, policy_version 299062 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:08,976][24592] Fps is (10 sec: 36042.6, 60 sec: 38366.1, 300 sec: 36322.5). Total num frames: 2449924096. Throughput: 0: 9651.4. Samples: 362481636. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:08,978][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:10,655][626795] Updated weights for policy 0, policy_version 299072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:12,752][626795] Updated weights for policy 0, policy_version 299082 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:13,975][24592] Fps is (10 sec: 37684.4, 60 sec: 38502.6, 300 sec: 36378.0). Total num frames: 2450120704. Throughput: 0: 9629.0. Samples: 362510112. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:13,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:14,881][626795] Updated weights for policy 0, policy_version 299092 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:16,984][626795] Updated weights for policy 0, policy_version 299102 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:18,976][24592] Fps is (10 sec: 39323.3, 60 sec: 38502.3, 300 sec: 36378.1). Total num frames: 2450317312. Throughput: 0: 9586.1. Samples: 362568132. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:18,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:19,151][626795] Updated weights for policy 0, policy_version 299112 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:21,233][626795] Updated weights for policy 0, policy_version 299122 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:23,186][626772] Signal inference workers to stop experience collection... (4700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:23,189][626772] Signal inference workers to resume experience collection... (4700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:23,203][626795] InferenceWorker_p0-w0: stopping experience collection (4700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:23,211][626795] InferenceWorker_p0-w0: resuming experience collection (4700 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:23,233][626795] Updated weights for policy 0, policy_version 299132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:23,975][24592] Fps is (10 sec: 38502.2, 60 sec: 38502.5, 300 sec: 36378.1). Total num frames: 2450505728. Throughput: 0: 9564.0. Samples: 362626452. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:23,977][24592] Avg episode reward: [(0, '4.860')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:25,463][626795] Updated weights for policy 0, policy_version 299142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:27,593][626795] Updated weights for policy 0, policy_version 299152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:28,975][24592] Fps is (10 sec: 39322.3, 60 sec: 38502.6, 300 sec: 36405.8). Total num frames: 2450710528. Throughput: 0: 9592.8. Samples: 362655234. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:28,976][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:29,653][626795] Updated weights for policy 0, policy_version 299162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:31,662][626795] Updated weights for policy 0, policy_version 299172 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:33,689][626795] Updated weights for policy 0, policy_version 299182 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:33,976][24592] Fps is (10 sec: 40138.5, 60 sec: 38502.1, 300 sec: 36405.7). Total num frames: 2450907136. Throughput: 0: 9613.1. Samples: 362714766. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:33,978][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:35,702][626795] Updated weights for policy 0, policy_version 299192 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:37,756][626795] Updated weights for policy 0, policy_version 299202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:38,975][24592] Fps is (10 sec: 39321.6, 60 sec: 38502.8, 300 sec: 36433.6). Total num frames: 2451103744. Throughput: 0: 9665.0. Samples: 362774892. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:38,978][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:39,846][626795] Updated weights for policy 0, policy_version 299212 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:41,930][626795] Updated weights for policy 0, policy_version 299222 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:43,975][24592] Fps is (10 sec: 39324.0, 60 sec: 38775.5, 300 sec: 36433.6). Total num frames: 2451300352. Throughput: 0: 9688.1. Samples: 362804436. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:43,977][24592] Avg episode reward: [(0, '4.507')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:44,103][626795] Updated weights for policy 0, policy_version 299232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:46,614][626795] Updated weights for policy 0, policy_version 299242 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:48,611][626795] Updated weights for policy 0, policy_version 299252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:48,975][24592] Fps is (10 sec: 36864.0, 60 sec: 38365.9, 300 sec: 36378.1). Total num frames: 2451472384. Throughput: 0: 9630.5. Samples: 362858898. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:48,976][24592] Avg episode reward: [(0, '4.959')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:50,832][626795] Updated weights for policy 0, policy_version 299262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:52,857][626795] Updated weights for policy 0, policy_version 299272 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:53,976][24592] Fps is (10 sec: 36863.1, 60 sec: 38365.9, 300 sec: 36378.0). Total num frames: 2451668992. Throughput: 0: 9678.3. Samples: 362917158. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:53,977][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:55,043][626795] Updated weights for policy 0, policy_version 299282 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:57,163][626795] Updated weights for policy 0, policy_version 299292 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:58,977][24592] Fps is (10 sec: 40133.7, 60 sec: 38501.3, 300 sec: 36405.6). Total num frames: 2451873792. Throughput: 0: 9677.6. Samples: 362945622. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:51:58,979][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:51:59,279][626795] Updated weights for policy 0, policy_version 299302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:01,430][626795] Updated weights for policy 0, policy_version 299312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:03,516][626795] Updated weights for policy 0, policy_version 299322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:03,975][24592] Fps is (10 sec: 39322.7, 60 sec: 38639.1, 300 sec: 36378.0). Total num frames: 2452062208. Throughput: 0: 9678.4. Samples: 363003660. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:03,976][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:05,639][626795] Updated weights for policy 0, policy_version 299332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:07,675][626795] Updated weights for policy 0, policy_version 299342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:08,977][24592] Fps is (10 sec: 37685.0, 60 sec: 38775.0, 300 sec: 36350.1). Total num frames: 2452250624. Throughput: 0: 9692.5. Samples: 363062628. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:08,978][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:09,879][626795] Updated weights for policy 0, policy_version 299352 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:11,879][626795] Updated weights for policy 0, policy_version 299362 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:13,842][626795] Updated weights for policy 0, policy_version 299372 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:13,975][24592] Fps is (10 sec: 39321.5, 60 sec: 38912.0, 300 sec: 36433.6). Total num frames: 2452455424. Throughput: 0: 9698.9. Samples: 363091686. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:13,977][24592] Avg episode reward: [(0, '5.057')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:16,099][626795] Updated weights for policy 0, policy_version 299382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:18,529][626795] Updated weights for policy 0, policy_version 299392 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:18,976][24592] Fps is (10 sec: 37686.1, 60 sec: 38502.2, 300 sec: 36378.0). Total num frames: 2452627456. Throughput: 0: 9633.0. Samples: 363148248. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:18,979][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:20,675][626795] Updated weights for policy 0, policy_version 299402 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:22,902][626795] Updated weights for policy 0, policy_version 299412 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:23,975][24592] Fps is (10 sec: 36864.1, 60 sec: 38639.0, 300 sec: 36433.6). Total num frames: 2452824064. Throughput: 0: 9545.9. Samples: 363204456. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:23,977][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:25,054][626795] Updated weights for policy 0, policy_version 299422 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:27,035][626795] Updated weights for policy 0, policy_version 299432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:28,976][24592] Fps is (10 sec: 39319.7, 60 sec: 38501.8, 300 sec: 36461.2). Total num frames: 2453020672. Throughput: 0: 9530.1. Samples: 363233298. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:28,978][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:29,201][626795] Updated weights for policy 0, policy_version 299442 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:31,327][626795] Updated weights for policy 0, policy_version 299452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:33,251][626795] Updated weights for policy 0, policy_version 299462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:33,975][24592] Fps is (10 sec: 39321.6, 60 sec: 38502.8, 300 sec: 36489.1). Total num frames: 2453217280. Throughput: 0: 9637.2. Samples: 363292572. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:33,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:35,342][626795] Updated weights for policy 0, policy_version 299472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:37,580][626795] Updated weights for policy 0, policy_version 299482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:38,975][24592] Fps is (10 sec: 39325.4, 60 sec: 38502.4, 300 sec: 36628.0). Total num frames: 2453413888. Throughput: 0: 9648.9. Samples: 363351354. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:38,976][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:39,557][626795] Updated weights for policy 0, policy_version 299492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:41,693][626795] Updated weights for policy 0, policy_version 299502 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:43,662][626795] Updated weights for policy 0, policy_version 299512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:43,976][24592] Fps is (10 sec: 39320.7, 60 sec: 38502.3, 300 sec: 36711.5). Total num frames: 2453610496. Throughput: 0: 9670.7. Samples: 363380790. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:43,977][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:45,838][626795] Updated weights for policy 0, policy_version 299522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:47,895][626795] Updated weights for policy 0, policy_version 299532 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:48,976][24592] Fps is (10 sec: 39318.8, 60 sec: 38911.5, 300 sec: 36739.0). Total num frames: 2453807104. Throughput: 0: 9695.8. Samples: 363439980. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:48,978][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:50,085][626795] Updated weights for policy 0, policy_version 299542 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:52,245][626795] Updated weights for policy 0, policy_version 299552 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:53,983][24592] Fps is (10 sec: 38475.1, 60 sec: 38770.9, 300 sec: 36710.4). Total num frames: 2453995520. Throughput: 0: 9670.2. Samples: 363497844. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:53,985][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:54,415][626795] Updated weights for policy 0, policy_version 299562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:56,509][626795] Updated weights for policy 0, policy_version 299572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:52:58,483][626795] Updated weights for policy 0, policy_version 299582 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:58,975][24592] Fps is (10 sec: 37685.9, 60 sec: 38503.5, 300 sec: 36739.0). Total num frames: 2454183936. Throughput: 0: 9653.3. Samples: 363526086. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:52:58,977][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:00,696][626795] Updated weights for policy 0, policy_version 299592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:02,793][626795] Updated weights for policy 0, policy_version 299602 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:03,975][24592] Fps is (10 sec: 38530.3, 60 sec: 38638.9, 300 sec: 36794.6). Total num frames: 2454380544. Throughput: 0: 9681.2. Samples: 363583896. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:03,976][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000299607_2454380544.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:04,081][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000298528_2445541376.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:05,118][626795] Updated weights for policy 0, policy_version 299612 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:07,323][626795] Updated weights for policy 0, policy_version 299622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:08,977][24592] Fps is (10 sec: 36859.3, 60 sec: 38365.9, 300 sec: 36766.6). Total num frames: 2454552576. Throughput: 0: 9617.1. Samples: 363637236. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:08,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:09,804][626795] Updated weights for policy 0, policy_version 299632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:11,823][626795] Updated weights for policy 0, policy_version 299642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:13,950][626795] Updated weights for policy 0, policy_version 299652 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:13,975][24592] Fps is (10 sec: 36863.9, 60 sec: 38229.3, 300 sec: 36739.0). Total num frames: 2454749184. Throughput: 0: 9610.9. Samples: 363665778. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:13,977][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:16,070][626795] Updated weights for policy 0, policy_version 299662 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:18,067][626795] Updated weights for policy 0, policy_version 299672 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:18,975][24592] Fps is (10 sec: 39326.7, 60 sec: 38639.3, 300 sec: 36711.3). Total num frames: 2454945792. Throughput: 0: 9592.3. Samples: 363724224. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:18,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:20,243][626795] Updated weights for policy 0, policy_version 299682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:22,426][626795] Updated weights for policy 0, policy_version 299692 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:23,976][24592] Fps is (10 sec: 36863.7, 60 sec: 38229.2, 300 sec: 36627.9). Total num frames: 2455117824. Throughput: 0: 9497.0. Samples: 363778722. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:23,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:24,945][626795] Updated weights for policy 0, policy_version 299702 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:27,086][626795] Updated weights for policy 0, policy_version 299712 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:28,976][24592] Fps is (10 sec: 36043.5, 60 sec: 38093.2, 300 sec: 36600.2). Total num frames: 2455306240. Throughput: 0: 9472.9. Samples: 363807072. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:28,979][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:29,247][626795] Updated weights for policy 0, policy_version 299722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:31,549][626795] Updated weights for policy 0, policy_version 299732 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:33,679][626795] Updated weights for policy 0, policy_version 299742 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:33,975][24592] Fps is (10 sec: 38502.9, 60 sec: 38092.8, 300 sec: 36628.0). Total num frames: 2455502848. Throughput: 0: 9384.0. Samples: 363862254. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:33,976][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:35,762][626795] Updated weights for policy 0, policy_version 299752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:37,769][626795] Updated weights for policy 0, policy_version 299762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:38,976][24592] Fps is (10 sec: 38502.3, 60 sec: 37956.1, 300 sec: 36711.2). Total num frames: 2455691264. Throughput: 0: 9415.1. Samples: 363921456. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:38,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:39,913][626795] Updated weights for policy 0, policy_version 299772 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:42,005][626795] Updated weights for policy 0, policy_version 299782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:43,977][24592] Fps is (10 sec: 38496.7, 60 sec: 37955.4, 300 sec: 36766.6). Total num frames: 2455887872. Throughput: 0: 9453.4. Samples: 363951504. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:43,978][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:44,267][626795] Updated weights for policy 0, policy_version 299792 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:46,839][626795] Updated weights for policy 0, policy_version 299802 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:48,975][24592] Fps is (10 sec: 34407.6, 60 sec: 37137.5, 300 sec: 36739.1). Total num frames: 2456035328. Throughput: 0: 9251.5. Samples: 364000212. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:48,977][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:49,734][626795] Updated weights for policy 0, policy_version 299812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:52,479][626795] Updated weights for policy 0, policy_version 299822 (0.0032)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:53,976][24592] Fps is (10 sec: 30314.2, 60 sec: 36595.2, 300 sec: 36683.5). Total num frames: 2456190976. Throughput: 0: 9115.0. Samples: 364047402. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:53,979][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:54,769][626795] Updated weights for policy 0, policy_version 299832 (0.0033)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:57,058][626795] Updated weights for policy 0, policy_version 299842 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:58,976][24592] Fps is (10 sec: 33585.6, 60 sec: 36454.1, 300 sec: 36739.0). Total num frames: 2456371200. Throughput: 0: 9050.5. Samples: 364073052. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:53:58,981][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:53:59,391][626795] Updated weights for policy 0, policy_version 299852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:02,281][626795] Updated weights for policy 0, policy_version 299862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:03,976][24592] Fps is (10 sec: 32768.3, 60 sec: 35635.1, 300 sec: 36572.4). Total num frames: 2456518656. Throughput: 0: 8814.6. Samples: 364120884. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:03,976][24592] Avg episode reward: [(0, '4.430')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:04,783][626795] Updated weights for policy 0, policy_version 299872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:06,844][626795] Updated weights for policy 0, policy_version 299882 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:08,861][626795] Updated weights for policy 0, policy_version 299892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:08,976][24592] Fps is (10 sec: 34406.5, 60 sec: 36045.3, 300 sec: 36711.2). Total num frames: 2456715264. Throughput: 0: 8847.0. Samples: 364176840. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:08,976][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:10,972][626795] Updated weights for policy 0, policy_version 299902 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:12,963][626795] Updated weights for policy 0, policy_version 299912 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:13,975][24592] Fps is (10 sec: 39322.3, 60 sec: 36044.9, 300 sec: 36850.3). Total num frames: 2456911872. Throughput: 0: 8884.6. Samples: 364206876. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:13,977][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:15,063][626795] Updated weights for policy 0, policy_version 299922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:17,090][626795] Updated weights for policy 0, policy_version 299932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:18,976][24592] Fps is (10 sec: 40140.3, 60 sec: 36181.0, 300 sec: 37044.4). Total num frames: 2457116672. Throughput: 0: 9004.2. Samples: 364267446. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:18,978][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:19,115][626795] Updated weights for policy 0, policy_version 299942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:21,130][626795] Updated weights for policy 0, policy_version 299952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:23,179][626795] Updated weights for policy 0, policy_version 299962 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:23,976][24592] Fps is (10 sec: 40138.7, 60 sec: 36590.7, 300 sec: 37183.3). Total num frames: 2457313280. Throughput: 0: 9027.3. Samples: 364327686. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:23,977][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:25,356][626795] Updated weights for policy 0, policy_version 299972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:27,372][626795] Updated weights for policy 0, policy_version 299982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:28,975][24592] Fps is (10 sec: 40142.6, 60 sec: 36864.1, 300 sec: 37350.0). Total num frames: 2457518080. Throughput: 0: 9007.6. Samples: 364356834. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:28,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:29,527][626795] Updated weights for policy 0, policy_version 299992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:31,496][626795] Updated weights for policy 0, policy_version 300002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:33,584][626795] Updated weights for policy 0, policy_version 300012 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:33,975][24592] Fps is (10 sec: 40142.5, 60 sec: 36864.0, 300 sec: 37488.8). Total num frames: 2457714688. Throughput: 0: 9243.7. Samples: 364416180. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:33,976][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:35,588][626795] Updated weights for policy 0, policy_version 300022 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:37,687][626795] Updated weights for policy 0, policy_version 300032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:38,975][24592] Fps is (10 sec: 39321.8, 60 sec: 37000.7, 300 sec: 37627.7). Total num frames: 2457911296. Throughput: 0: 9524.8. Samples: 364476018. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:38,978][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:39,654][626795] Updated weights for policy 0, policy_version 300042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:41,735][626795] Updated weights for policy 0, policy_version 300052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:43,803][626795] Updated weights for policy 0, policy_version 300062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:43,975][24592] Fps is (10 sec: 39322.0, 60 sec: 37001.5, 300 sec: 37822.1). Total num frames: 2458107904. Throughput: 0: 9623.2. Samples: 364506090. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:43,976][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:45,799][626795] Updated weights for policy 0, policy_version 300072 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:47,781][626795] Updated weights for policy 0, policy_version 300082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:48,976][24592] Fps is (10 sec: 40140.3, 60 sec: 37956.1, 300 sec: 38016.5). Total num frames: 2458312704. Throughput: 0: 9917.7. Samples: 364567182. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:48,978][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:49,838][626795] Updated weights for policy 0, policy_version 300092 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:51,874][626795] Updated weights for policy 0, policy_version 300102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:53,882][626795] Updated weights for policy 0, policy_version 300112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:53,975][24592] Fps is (10 sec: 40959.8, 60 sec: 38775.6, 300 sec: 38127.5). Total num frames: 2458517504. Throughput: 0: 10022.5. Samples: 364627848. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:53,978][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:55,940][626795] Updated weights for policy 0, policy_version 300122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:54:58,073][626795] Updated weights for policy 0, policy_version 300132 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:58,975][24592] Fps is (10 sec: 40141.1, 60 sec: 39048.8, 300 sec: 38183.0). Total num frames: 2458714112. Throughput: 0: 10011.2. Samples: 364657380. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:54:58,977][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:00,130][626795] Updated weights for policy 0, policy_version 300142 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:02,224][626795] Updated weights for policy 0, policy_version 300152 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:03,975][24592] Fps is (10 sec: 39321.3, 60 sec: 39867.8, 300 sec: 38266.5). Total num frames: 2458910720. Throughput: 0: 9972.1. Samples: 364716186. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:03,977][24592] Avg episode reward: [(0, '4.392')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000300160_2458910720.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:04,116][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000299042_2449752064.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:04,439][626795] Updated weights for policy 0, policy_version 300162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:06,514][626795] Updated weights for policy 0, policy_version 300172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:08,570][626795] Updated weights for policy 0, policy_version 300182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:08,976][24592] Fps is (10 sec: 39321.5, 60 sec: 39867.9, 300 sec: 38294.1). Total num frames: 2459107328. Throughput: 0: 9923.8. Samples: 364774254. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:08,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:10,640][626795] Updated weights for policy 0, policy_version 300192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:12,596][626795] Updated weights for policy 0, policy_version 300202 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:13,975][24592] Fps is (10 sec: 39321.9, 60 sec: 39867.7, 300 sec: 38294.1). Total num frames: 2459303936. Throughput: 0: 9941.6. Samples: 364804206. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:13,979][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:14,680][626795] Updated weights for policy 0, policy_version 300212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:16,704][626795] Updated weights for policy 0, policy_version 300222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:18,736][626795] Updated weights for policy 0, policy_version 300232 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:18,984][24592] Fps is (10 sec: 40108.0, 60 sec: 39862.6, 300 sec: 38348.6). Total num frames: 2459508736. Throughput: 0: 9971.9. Samples: 364864998. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:18,985][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:20,775][626795] Updated weights for policy 0, policy_version 300242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:22,790][626795] Updated weights for policy 0, policy_version 300252 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:23,976][24592] Fps is (10 sec: 40139.8, 60 sec: 39867.9, 300 sec: 38321.9). Total num frames: 2459705344. Throughput: 0: 10003.2. Samples: 364926162. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:23,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:24,890][626795] Updated weights for policy 0, policy_version 300262 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:26,793][626795] Updated weights for policy 0, policy_version 300272 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:28,781][626795] Updated weights for policy 0, policy_version 300282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:28,975][24592] Fps is (10 sec: 40174.0, 60 sec: 39867.8, 300 sec: 38349.7). Total num frames: 2459910144. Throughput: 0: 9995.2. Samples: 364955874. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:28,976][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:30,961][626795] Updated weights for policy 0, policy_version 300292 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:33,064][626795] Updated weights for policy 0, policy_version 300302 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:33,976][24592] Fps is (10 sec: 40140.8, 60 sec: 39867.6, 300 sec: 38349.7). Total num frames: 2460106752. Throughput: 0: 9952.1. Samples: 365015028. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:33,977][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:35,234][626795] Updated weights for policy 0, policy_version 300312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:37,222][626795] Updated weights for policy 0, policy_version 300322 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:38,975][24592] Fps is (10 sec: 39321.5, 60 sec: 39867.7, 300 sec: 38405.2). Total num frames: 2460303360. Throughput: 0: 9930.7. Samples: 365074728. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:38,977][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:39,256][626795] Updated weights for policy 0, policy_version 300332 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:41,314][626795] Updated weights for policy 0, policy_version 300342 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:43,321][626795] Updated weights for policy 0, policy_version 300352 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:43,975][24592] Fps is (10 sec: 39322.7, 60 sec: 39867.7, 300 sec: 38405.2). Total num frames: 2460499968. Throughput: 0: 9934.2. Samples: 365104416. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:43,977][24592] Avg episode reward: [(0, '5.010')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:45,406][626795] Updated weights for policy 0, policy_version 300362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:47,399][626795] Updated weights for policy 0, policy_version 300372 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:48,976][24592] Fps is (10 sec: 40139.5, 60 sec: 39867.6, 300 sec: 38433.0). Total num frames: 2460704768. Throughput: 0: 9986.2. Samples: 365165568. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:48,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:49,478][626795] Updated weights for policy 0, policy_version 300382 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:51,572][626795] Updated weights for policy 0, policy_version 300392 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:53,476][626795] Updated weights for policy 0, policy_version 300402 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:53,976][24592] Fps is (10 sec: 40958.2, 60 sec: 39867.5, 300 sec: 38460.7). Total num frames: 2460909568. Throughput: 0: 10027.5. Samples: 365225496. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:53,977][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:55,622][626795] Updated weights for policy 0, policy_version 300412 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:57,547][626795] Updated weights for policy 0, policy_version 300422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:58,976][24592] Fps is (10 sec: 40960.8, 60 sec: 40004.2, 300 sec: 38544.1). Total num frames: 2461114368. Throughput: 0: 10010.8. Samples: 365254692. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:55:58,977][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:55:59,577][626795] Updated weights for policy 0, policy_version 300432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:01,751][626795] Updated weights for policy 0, policy_version 300442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:03,874][626795] Updated weights for policy 0, policy_version 300452 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:03,975][24592] Fps is (10 sec: 39322.9, 60 sec: 39867.7, 300 sec: 38571.9). Total num frames: 2461302784. Throughput: 0: 10000.6. Samples: 365314944. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:03,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:05,963][626795] Updated weights for policy 0, policy_version 300462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:08,044][626795] Updated weights for policy 0, policy_version 300472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:08,975][24592] Fps is (10 sec: 38503.1, 60 sec: 39867.8, 300 sec: 38571.8). Total num frames: 2461499392. Throughput: 0: 9940.2. Samples: 365373468. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:08,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:10,125][626795] Updated weights for policy 0, policy_version 300482 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:12,130][626795] Updated weights for policy 0, policy_version 300492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:13,976][24592] Fps is (10 sec: 40140.1, 60 sec: 40004.1, 300 sec: 38599.6). Total num frames: 2461704192. Throughput: 0: 9944.4. Samples: 365403372. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:13,978][24592] Avg episode reward: [(0, '4.816')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:14,135][626795] Updated weights for policy 0, policy_version 300502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:16,162][626795] Updated weights for policy 0, policy_version 300512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:18,184][626795] Updated weights for policy 0, policy_version 300522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:18,975][24592] Fps is (10 sec: 40140.7, 60 sec: 39873.2, 300 sec: 38627.4). Total num frames: 2461900800. Throughput: 0: 9992.7. Samples: 365464698. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:18,977][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:20,139][626795] Updated weights for policy 0, policy_version 300532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:22,182][626795] Updated weights for policy 0, policy_version 300542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:23,975][24592] Fps is (10 sec: 40961.0, 60 sec: 40141.0, 300 sec: 38655.1). Total num frames: 2462113792. Throughput: 0: 10031.6. Samples: 365526150. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:23,977][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:24,236][626795] Updated weights for policy 0, policy_version 300552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:26,231][626795] Updated weights for policy 0, policy_version 300562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:28,245][626795] Updated weights for policy 0, policy_version 300572 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:28,975][24592] Fps is (10 sec: 40960.3, 60 sec: 40004.3, 300 sec: 38655.2). Total num frames: 2462310400. Throughput: 0: 10033.2. Samples: 365555910. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:28,977][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:30,226][626795] Updated weights for policy 0, policy_version 300582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:32,170][626795] Updated weights for policy 0, policy_version 300592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:33,975][24592] Fps is (10 sec: 40140.9, 60 sec: 40141.0, 300 sec: 38682.9). Total num frames: 2462515200. Throughput: 0: 10042.8. Samples: 365617488. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:33,977][24592] Avg episode reward: [(0, '4.378')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:34,293][626795] Updated weights for policy 0, policy_version 300602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:36,459][626795] Updated weights for policy 0, policy_version 300612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:38,396][626795] Updated weights for policy 0, policy_version 300622 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:38,983][24592] Fps is (10 sec: 40111.3, 60 sec: 40136.0, 300 sec: 38681.9). Total num frames: 2462711808. Throughput: 0: 10032.5. Samples: 365677026. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:38,984][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:40,549][626795] Updated weights for policy 0, policy_version 300632 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:42,475][626795] Updated weights for policy 0, policy_version 300642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:43,975][24592] Fps is (10 sec: 39321.4, 60 sec: 40140.8, 300 sec: 38766.2). Total num frames: 2462908416. Throughput: 0: 10039.4. Samples: 365706462. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:43,977][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:44,671][626795] Updated weights for policy 0, policy_version 300652 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:46,570][626795] Updated weights for policy 0, policy_version 300662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:48,671][626795] Updated weights for policy 0, policy_version 300672 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:48,976][24592] Fps is (10 sec: 40988.2, 60 sec: 40277.3, 300 sec: 38821.7). Total num frames: 2463121408. Throughput: 0: 10049.0. Samples: 365767152. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:48,977][24592] Avg episode reward: [(0, '4.870')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:50,600][626795] Updated weights for policy 0, policy_version 300682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:52,628][626795] Updated weights for policy 0, policy_version 300692 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:53,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40141.1, 300 sec: 38794.2). Total num frames: 2463318016. Throughput: 0: 10101.6. Samples: 365828040. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:53,976][24592] Avg episode reward: [(0, '4.935')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:54,733][626795] Updated weights for policy 0, policy_version 300702 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:56,835][626795] Updated weights for policy 0, policy_version 300712 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:56:58,733][626795] Updated weights for policy 0, policy_version 300722 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:58,975][24592] Fps is (10 sec: 40142.5, 60 sec: 40140.9, 300 sec: 38849.5). Total num frames: 2463522816. Throughput: 0: 10108.7. Samples: 365858262. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:56:58,977][24592] Avg episode reward: [(0, '4.821')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:00,701][626795] Updated weights for policy 0, policy_version 300732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:02,777][626795] Updated weights for policy 0, policy_version 300742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:03,975][24592] Fps is (10 sec: 40959.8, 60 sec: 40413.9, 300 sec: 38905.2). Total num frames: 2463727616. Throughput: 0: 10123.3. Samples: 365920248. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:03,977][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000300748_2463727616.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:04,152][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000299607_2454380544.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:04,803][626795] Updated weights for policy 0, policy_version 300752 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:06,750][626795] Updated weights for policy 0, policy_version 300762 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:08,976][24592] Fps is (10 sec: 39321.1, 60 sec: 40277.2, 300 sec: 38849.5). Total num frames: 2463916032. Throughput: 0: 10069.7. Samples: 365979288. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:08,977][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:08,988][626795] Updated weights for policy 0, policy_version 300772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:10,965][626795] Updated weights for policy 0, policy_version 300782 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:13,017][626795] Updated weights for policy 0, policy_version 300792 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:13,975][24592] Fps is (10 sec: 39321.7, 60 sec: 40277.5, 300 sec: 38960.7). Total num frames: 2464120832. Throughput: 0: 10072.5. Samples: 366009174. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:13,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:14,992][626795] Updated weights for policy 0, policy_version 300802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:17,127][626795] Updated weights for policy 0, policy_version 300812 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:18,976][24592] Fps is (10 sec: 40960.1, 60 sec: 40413.8, 300 sec: 38988.3). Total num frames: 2464325632. Throughput: 0: 10049.8. Samples: 366069732. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:18,977][24592] Avg episode reward: [(0, '4.599')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:19,052][626795] Updated weights for policy 0, policy_version 300822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:21,087][626795] Updated weights for policy 0, policy_version 300832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:23,026][626795] Updated weights for policy 0, policy_version 300842 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:23,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40277.3, 300 sec: 39016.3). Total num frames: 2464530432. Throughput: 0: 10087.5. Samples: 366130890. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:23,976][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:25,159][626795] Updated weights for policy 0, policy_version 300852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:27,135][626795] Updated weights for policy 0, policy_version 300862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:28,975][24592] Fps is (10 sec: 40960.7, 60 sec: 40413.9, 300 sec: 39043.9). Total num frames: 2464735232. Throughput: 0: 10105.3. Samples: 366161202. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:28,979][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:29,061][626795] Updated weights for policy 0, policy_version 300872 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:31,107][626795] Updated weights for policy 0, policy_version 300882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:33,119][626795] Updated weights for policy 0, policy_version 300892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:33,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40413.8, 300 sec: 39071.7). Total num frames: 2464940032. Throughput: 0: 10124.6. Samples: 366222756. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:33,976][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:35,087][626795] Updated weights for policy 0, policy_version 300902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:37,077][626795] Updated weights for policy 0, policy_version 300912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:38,345][626772] Signal inference workers to stop experience collection... (4750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:38,346][626772] Signal inference workers to resume experience collection... (4750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:38,360][626795] InferenceWorker_p0-w0: stopping experience collection (4750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:38,361][626795] InferenceWorker_p0-w0: resuming experience collection (4750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:38,975][24592] Fps is (10 sec: 40140.7, 60 sec: 40418.8, 300 sec: 39071.7). Total num frames: 2465136640. Throughput: 0: 10117.7. Samples: 366283338. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:38,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:39,216][626795] Updated weights for policy 0, policy_version 300922 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:41,423][626795] Updated weights for policy 0, policy_version 300932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:43,266][626795] Updated weights for policy 0, policy_version 300942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:43,976][24592] Fps is (10 sec: 40138.8, 60 sec: 40550.1, 300 sec: 39099.5). Total num frames: 2465341440. Throughput: 0: 10108.2. Samples: 366313134. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:43,980][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:45,424][626795] Updated weights for policy 0, policy_version 300952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:47,412][626795] Updated weights for policy 0, policy_version 300962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:48,975][24592] Fps is (10 sec: 40140.7, 60 sec: 40277.6, 300 sec: 39128.2). Total num frames: 2465538048. Throughput: 0: 10080.5. Samples: 366373872. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:48,977][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:49,421][626795] Updated weights for policy 0, policy_version 300972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:51,444][626795] Updated weights for policy 0, policy_version 300982 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:53,447][626795] Updated weights for policy 0, policy_version 300992 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:53,975][24592] Fps is (10 sec: 40142.9, 60 sec: 40413.9, 300 sec: 39182.8). Total num frames: 2465742848. Throughput: 0: 10118.8. Samples: 366434634. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:53,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:55,534][626795] Updated weights for policy 0, policy_version 301002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:57,479][626795] Updated weights for policy 0, policy_version 301012 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:58,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40413.9, 300 sec: 39210.5). Total num frames: 2465947648. Throughput: 0: 10122.8. Samples: 366464700. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:57:58,976][24592] Avg episode reward: [(0, '4.370')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:57:59,512][626795] Updated weights for policy 0, policy_version 301022 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:01,468][626795] Updated weights for policy 0, policy_version 301032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:03,449][626795] Updated weights for policy 0, policy_version 301042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:03,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40413.9, 300 sec: 39321.8). Total num frames: 2466152448. Throughput: 0: 10142.0. Samples: 366526122. Policy #0 lag: (min: 0.0, avg: 1.6, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:03,976][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:05,462][626795] Updated weights for policy 0, policy_version 301052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:07,580][626795] Updated weights for policy 0, policy_version 301062 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:08,976][24592] Fps is (10 sec: 40137.9, 60 sec: 40550.0, 300 sec: 39321.5). Total num frames: 2466349056. Throughput: 0: 10121.4. Samples: 366586362. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:08,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:09,536][626795] Updated weights for policy 0, policy_version 301072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:11,654][626795] Updated weights for policy 0, policy_version 301082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:13,851][626795] Updated weights for policy 0, policy_version 301092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:13,975][24592] Fps is (10 sec: 40140.5, 60 sec: 40550.4, 300 sec: 39349.4). Total num frames: 2466553856. Throughput: 0: 10111.3. Samples: 366616212. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:13,977][24592] Avg episode reward: [(0, '4.769')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:15,866][626795] Updated weights for policy 0, policy_version 301102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:17,788][626795] Updated weights for policy 0, policy_version 301112 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:18,975][24592] Fps is (10 sec: 40143.9, 60 sec: 40414.0, 300 sec: 39432.7). Total num frames: 2466750464. Throughput: 0: 10083.4. Samples: 366676506. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:18,977][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:19,930][626795] Updated weights for policy 0, policy_version 301122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:21,860][626795] Updated weights for policy 0, policy_version 301132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:23,885][626795] Updated weights for policy 0, policy_version 301142 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:23,975][24592] Fps is (10 sec: 40140.8, 60 sec: 40413.8, 300 sec: 39488.3). Total num frames: 2466955264. Throughput: 0: 10085.2. Samples: 366737172. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:23,976][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:25,987][626795] Updated weights for policy 0, policy_version 301152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:28,030][626795] Updated weights for policy 0, policy_version 301162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:28,978][24592] Fps is (10 sec: 40948.6, 60 sec: 40412.0, 300 sec: 39515.6). Total num frames: 2467160064. Throughput: 0: 10099.4. Samples: 366767628. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:28,979][24592] Avg episode reward: [(0, '5.013')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:29,963][626795] Updated weights for policy 0, policy_version 301172 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:31,948][626795] Updated weights for policy 0, policy_version 301182 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:33,933][626795] Updated weights for policy 0, policy_version 301192 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:33,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40413.9, 300 sec: 39571.6). Total num frames: 2467364864. Throughput: 0: 10104.9. Samples: 366828594. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:33,976][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:35,966][626795] Updated weights for policy 0, policy_version 301202 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:37,968][626795] Updated weights for policy 0, policy_version 301212 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:38,975][24592] Fps is (10 sec: 40151.7, 60 sec: 40413.9, 300 sec: 39571.7). Total num frames: 2467561472. Throughput: 0: 10119.3. Samples: 366890004. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:38,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:40,007][626795] Updated weights for policy 0, policy_version 301222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:41,868][626795] Updated weights for policy 0, policy_version 301232 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:43,976][24592] Fps is (10 sec: 40140.4, 60 sec: 40414.1, 300 sec: 39765.9). Total num frames: 2467766272. Throughput: 0: 10142.5. Samples: 366921114. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:43,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:44,007][626795] Updated weights for policy 0, policy_version 301242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:46,608][626795] Updated weights for policy 0, policy_version 301252 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:48,975][24592] Fps is (10 sec: 36044.6, 60 sec: 39731.2, 300 sec: 39765.9). Total num frames: 2467921920. Throughput: 0: 9947.6. Samples: 366973764. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:48,977][24592] Avg episode reward: [(0, '4.229')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:49,422][626795] Updated weights for policy 0, policy_version 301262 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:51,404][626795] Updated weights for policy 0, policy_version 301272 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:53,383][626795] Updated weights for policy 0, policy_version 301282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:53,976][24592] Fps is (10 sec: 35225.7, 60 sec: 39594.6, 300 sec: 39821.5). Total num frames: 2468118528. Throughput: 0: 9822.8. Samples: 367028382. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:53,978][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:55,542][626795] Updated weights for policy 0, policy_version 301292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:57,604][626795] Updated weights for policy 0, policy_version 301302 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:58,975][24592] Fps is (10 sec: 39321.6, 60 sec: 39458.1, 300 sec: 39988.1). Total num frames: 2468315136. Throughput: 0: 9812.8. Samples: 367057788. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:58:58,977][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:58:59,623][626795] Updated weights for policy 0, policy_version 301312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:01,662][626795] Updated weights for policy 0, policy_version 301322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:03,648][626795] Updated weights for policy 0, policy_version 301332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:03,975][24592] Fps is (10 sec: 40960.3, 60 sec: 39594.6, 300 sec: 40043.7). Total num frames: 2468528128. Throughput: 0: 9828.2. Samples: 367118778. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:03,978][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000301334_2468528128.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:04,107][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000300160_2458910720.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:05,691][626795] Updated weights for policy 0, policy_version 301342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:07,593][626795] Updated weights for policy 0, policy_version 301352 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:08,975][24592] Fps is (10 sec: 40960.4, 60 sec: 39595.2, 300 sec: 40043.6). Total num frames: 2468724736. Throughput: 0: 9824.5. Samples: 367179276. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:08,976][24592] Avg episode reward: [(0, '4.427')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:09,772][626795] Updated weights for policy 0, policy_version 301362 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:11,768][626795] Updated weights for policy 0, policy_version 301372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:13,738][626795] Updated weights for policy 0, policy_version 301382 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:13,975][24592] Fps is (10 sec: 40140.4, 60 sec: 39594.6, 300 sec: 40043.7). Total num frames: 2468929536. Throughput: 0: 9821.8. Samples: 367209582. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:13,976][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:15,810][626795] Updated weights for policy 0, policy_version 301392 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:17,936][626795] Updated weights for policy 0, policy_version 301402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:18,976][24592] Fps is (10 sec: 40140.1, 60 sec: 39594.5, 300 sec: 40043.6). Total num frames: 2469126144. Throughput: 0: 9806.5. Samples: 367269888. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:18,977][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:20,030][626795] Updated weights for policy 0, policy_version 301412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:22,026][626795] Updated weights for policy 0, policy_version 301422 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:23,971][626795] Updated weights for policy 0, policy_version 301432 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:23,975][24592] Fps is (10 sec: 40140.9, 60 sec: 39594.6, 300 sec: 40043.6). Total num frames: 2469330944. Throughput: 0: 9768.1. Samples: 367329570. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:23,980][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:26,083][626795] Updated weights for policy 0, policy_version 301442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:28,096][626795] Updated weights for policy 0, policy_version 301452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:28,975][24592] Fps is (10 sec: 40141.2, 60 sec: 39459.9, 300 sec: 40043.6). Total num frames: 2469527552. Throughput: 0: 9744.8. Samples: 367359630. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:28,976][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:30,006][626795] Updated weights for policy 0, policy_version 301462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:32,108][626795] Updated weights for policy 0, policy_version 301472 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:33,975][24592] Fps is (10 sec: 40141.3, 60 sec: 39458.2, 300 sec: 40071.4). Total num frames: 2469732352. Throughput: 0: 9933.9. Samples: 367420788. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:33,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:34,210][626795] Updated weights for policy 0, policy_version 301482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:36,221][626795] Updated weights for policy 0, policy_version 301492 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:38,219][626795] Updated weights for policy 0, policy_version 301502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:38,976][24592] Fps is (10 sec: 40959.7, 60 sec: 39594.6, 300 sec: 40099.1). Total num frames: 2469937152. Throughput: 0: 10067.3. Samples: 367481412. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:38,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:40,206][626795] Updated weights for policy 0, policy_version 301512 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:42,226][626795] Updated weights for policy 0, policy_version 301522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:43,975][24592] Fps is (10 sec: 40140.4, 60 sec: 39458.2, 300 sec: 40071.4). Total num frames: 2470133760. Throughput: 0: 10086.4. Samples: 367511676. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:43,976][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:44,266][626795] Updated weights for policy 0, policy_version 301532 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:46,229][626795] Updated weights for policy 0, policy_version 301542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:48,363][626795] Updated weights for policy 0, policy_version 301552 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:48,975][24592] Fps is (10 sec: 40141.1, 60 sec: 40277.3, 300 sec: 40071.4). Total num frames: 2470338560. Throughput: 0: 10081.7. Samples: 367572456. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:48,977][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:50,398][626795] Updated weights for policy 0, policy_version 301562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:52,478][626795] Updated weights for policy 0, policy_version 301572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:53,976][24592] Fps is (10 sec: 40138.8, 60 sec: 40277.0, 300 sec: 40071.3). Total num frames: 2470535168. Throughput: 0: 10058.9. Samples: 367631934. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:53,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:54,577][626795] Updated weights for policy 0, policy_version 301582 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:56,599][626795] Updated weights for policy 0, policy_version 301592 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 17:59:58,678][626795] Updated weights for policy 0, policy_version 301602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:58,975][24592] Fps is (10 sec: 39321.6, 60 sec: 40277.3, 300 sec: 40071.4). Total num frames: 2470731776. Throughput: 0: 10043.5. Samples: 367661538. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 17:59:58,977][24592] Avg episode reward: [(0, '4.426')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:00,728][626795] Updated weights for policy 0, policy_version 301612 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:02,680][626795] Updated weights for policy 0, policy_version 301622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:03,975][24592] Fps is (10 sec: 40142.9, 60 sec: 40140.8, 300 sec: 40099.2). Total num frames: 2470936576. Throughput: 0: 10059.4. Samples: 367722558. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:03,976][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:04,751][626795] Updated weights for policy 0, policy_version 301632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:06,657][626795] Updated weights for policy 0, policy_version 301642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:08,639][626795] Updated weights for policy 0, policy_version 301652 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:08,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40277.3, 300 sec: 40126.9). Total num frames: 2471141376. Throughput: 0: 10104.0. Samples: 367784250. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:08,977][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:10,666][626795] Updated weights for policy 0, policy_version 301662 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:12,650][626795] Updated weights for policy 0, policy_version 301672 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:13,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40277.4, 300 sec: 40128.0). Total num frames: 2471346176. Throughput: 0: 10108.3. Samples: 367814502. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:13,977][24592] Avg episode reward: [(0, '4.867')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:14,704][626795] Updated weights for policy 0, policy_version 301682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:16,696][626795] Updated weights for policy 0, policy_version 301692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:18,731][626795] Updated weights for policy 0, policy_version 301702 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:18,977][24592] Fps is (10 sec: 40952.3, 60 sec: 40412.7, 300 sec: 40154.5). Total num frames: 2471550976. Throughput: 0: 10111.0. Samples: 367875804. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:18,979][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:20,809][626795] Updated weights for policy 0, policy_version 301712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:22,916][626795] Updated weights for policy 0, policy_version 301722 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:23,975][24592] Fps is (10 sec: 40140.7, 60 sec: 40277.4, 300 sec: 40126.9). Total num frames: 2471747584. Throughput: 0: 10073.4. Samples: 367934712. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:23,976][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:25,027][626795] Updated weights for policy 0, policy_version 301732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:26,988][626795] Updated weights for policy 0, policy_version 301742 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:28,976][24592] Fps is (10 sec: 39327.3, 60 sec: 40277.1, 300 sec: 40126.9). Total num frames: 2471944192. Throughput: 0: 10058.3. Samples: 367964304. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:28,978][24592] Avg episode reward: [(0, '4.851')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:29,193][626795] Updated weights for policy 0, policy_version 301752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:31,205][626795] Updated weights for policy 0, policy_version 301762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:33,147][626795] Updated weights for policy 0, policy_version 301772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:33,977][24592] Fps is (10 sec: 39314.6, 60 sec: 40139.5, 300 sec: 40126.7). Total num frames: 2472140800. Throughput: 0: 10044.8. Samples: 368024490. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:33,978][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:35,186][626795] Updated weights for policy 0, policy_version 301782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:37,296][626795] Updated weights for policy 0, policy_version 301792 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:38,975][24592] Fps is (10 sec: 40142.5, 60 sec: 40140.9, 300 sec: 40154.7). Total num frames: 2472345600. Throughput: 0: 10065.6. Samples: 368084880. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:38,977][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:39,222][626795] Updated weights for policy 0, policy_version 301802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:41,281][626795] Updated weights for policy 0, policy_version 301812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:43,211][626795] Updated weights for policy 0, policy_version 301822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:43,975][24592] Fps is (10 sec: 40967.4, 60 sec: 40277.4, 300 sec: 40154.7). Total num frames: 2472550400. Throughput: 0: 10089.5. Samples: 368115564. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:43,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:45,203][626795] Updated weights for policy 0, policy_version 301832 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:47,339][626795] Updated weights for policy 0, policy_version 301842 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:48,975][24592] Fps is (10 sec: 40141.1, 60 sec: 40140.9, 300 sec: 40127.0). Total num frames: 2472747008. Throughput: 0: 10099.1. Samples: 368177016. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:48,976][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:49,347][626795] Updated weights for policy 0, policy_version 301852 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:51,469][626795] Updated weights for policy 0, policy_version 301862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:53,575][626795] Updated weights for policy 0, policy_version 301872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:53,976][24592] Fps is (10 sec: 39320.5, 60 sec: 40141.0, 300 sec: 40099.1). Total num frames: 2472943616. Throughput: 0: 10039.0. Samples: 368236008. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:53,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:55,700][626795] Updated weights for policy 0, policy_version 301882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:57,659][626795] Updated weights for policy 0, policy_version 301892 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:58,976][24592] Fps is (10 sec: 40138.9, 60 sec: 40277.1, 300 sec: 40154.6). Total num frames: 2473148416. Throughput: 0: 10007.0. Samples: 368264820. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:00:58,978][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:00:59,804][626795] Updated weights for policy 0, policy_version 301902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:01,802][626795] Updated weights for policy 0, policy_version 301912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:03,849][626795] Updated weights for policy 0, policy_version 301922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:03,975][24592] Fps is (10 sec: 40141.9, 60 sec: 40140.8, 300 sec: 40154.7). Total num frames: 2473345024. Throughput: 0: 9987.5. Samples: 368325222. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:03,978][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000301923_2473353216.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:04,090][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000300748_2463727616.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:05,785][626795] Updated weights for policy 0, policy_version 301932 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:07,808][626795] Updated weights for policy 0, policy_version 301942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:08,975][24592] Fps is (10 sec: 40142.7, 60 sec: 40140.8, 300 sec: 40154.7). Total num frames: 2473549824. Throughput: 0: 10032.7. Samples: 368386182. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:08,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:09,872][626795] Updated weights for policy 0, policy_version 301952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:11,839][626795] Updated weights for policy 0, policy_version 301962 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:13,925][626795] Updated weights for policy 0, policy_version 301972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:13,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40140.8, 300 sec: 40182.5). Total num frames: 2473754624. Throughput: 0: 10051.3. Samples: 368416608. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:13,976][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:15,844][626795] Updated weights for policy 0, policy_version 301982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:17,855][626795] Updated weights for policy 0, policy_version 301992 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:18,993][24592] Fps is (10 sec: 40889.2, 60 sec: 40130.5, 300 sec: 40152.3). Total num frames: 2473959424. Throughput: 0: 10083.1. Samples: 368478384. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:18,993][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:19,881][626795] Updated weights for policy 0, policy_version 302002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:21,789][626795] Updated weights for policy 0, policy_version 302012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:23,930][626795] Updated weights for policy 0, policy_version 302022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:23,976][24592] Fps is (10 sec: 40957.6, 60 sec: 40277.0, 300 sec: 40182.4). Total num frames: 2474164224. Throughput: 0: 10113.3. Samples: 368539986. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:23,978][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:25,961][626795] Updated weights for policy 0, policy_version 302032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:28,011][626795] Updated weights for policy 0, policy_version 302042 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:28,975][24592] Fps is (10 sec: 40210.2, 60 sec: 40277.6, 300 sec: 40154.7). Total num frames: 2474360832. Throughput: 0: 10073.3. Samples: 368568864. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:28,977][24592] Avg episode reward: [(0, '4.892')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:30,140][626795] Updated weights for policy 0, policy_version 302052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:32,112][626795] Updated weights for policy 0, policy_version 302062 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:33,975][24592] Fps is (10 sec: 40143.1, 60 sec: 40415.1, 300 sec: 40183.5). Total num frames: 2474565632. Throughput: 0: 10053.6. Samples: 368629428. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:33,976][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:34,076][626795] Updated weights for policy 0, policy_version 302072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:36,151][626795] Updated weights for policy 0, policy_version 302082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:38,172][626795] Updated weights for policy 0, policy_version 302092 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:38,976][24592] Fps is (10 sec: 40958.8, 60 sec: 40413.7, 300 sec: 40210.2). Total num frames: 2474770432. Throughput: 0: 10106.0. Samples: 368690778. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:38,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:40,125][626795] Updated weights for policy 0, policy_version 302102 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:42,158][626795] Updated weights for policy 0, policy_version 302112 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:43,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40413.8, 300 sec: 40182.5). Total num frames: 2474975232. Throughput: 0: 10131.6. Samples: 368720736. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:43,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:44,273][626795] Updated weights for policy 0, policy_version 302122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:46,069][626795] Updated weights for policy 0, policy_version 302132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:48,145][626795] Updated weights for policy 0, policy_version 302142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:48,976][24592] Fps is (10 sec: 40140.7, 60 sec: 40413.6, 300 sec: 40182.4). Total num frames: 2475171840. Throughput: 0: 10151.3. Samples: 368782032. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:48,978][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:50,276][626795] Updated weights for policy 0, policy_version 302152 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:52,179][626795] Updated weights for policy 0, policy_version 302162 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:53,975][24592] Fps is (10 sec: 40141.3, 60 sec: 40550.6, 300 sec: 40182.5). Total num frames: 2475376640. Throughput: 0: 10164.0. Samples: 368843562. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:53,977][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:54,345][626795] Updated weights for policy 0, policy_version 302172 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:56,276][626795] Updated weights for policy 0, policy_version 302182 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:01:58,378][626795] Updated weights for policy 0, policy_version 302192 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:58,977][24592] Fps is (10 sec: 40954.1, 60 sec: 40549.5, 300 sec: 40182.2). Total num frames: 2475581440. Throughput: 0: 10143.9. Samples: 368873100. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:01:58,980][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:00,412][626795] Updated weights for policy 0, policy_version 302202 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:02,545][626795] Updated weights for policy 0, policy_version 302212 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:03,975][24592] Fps is (10 sec: 40140.6, 60 sec: 40550.4, 300 sec: 40210.2). Total num frames: 2475778048. Throughput: 0: 10090.9. Samples: 368932302. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:03,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:04,566][626795] Updated weights for policy 0, policy_version 302222 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:06,577][626795] Updated weights for policy 0, policy_version 302232 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:08,575][626795] Updated weights for policy 0, policy_version 302242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:08,976][24592] Fps is (10 sec: 39327.9, 60 sec: 40413.7, 300 sec: 40182.4). Total num frames: 2475974656. Throughput: 0: 10060.4. Samples: 368992698. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:08,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:10,756][626795] Updated weights for policy 0, policy_version 302252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:12,702][626795] Updated weights for policy 0, policy_version 302262 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:13,975][24592] Fps is (10 sec: 40140.8, 60 sec: 40413.8, 300 sec: 40182.5). Total num frames: 2476179456. Throughput: 0: 10078.0. Samples: 369022374. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:13,977][24592] Avg episode reward: [(0, '4.923')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:14,850][626795] Updated weights for policy 0, policy_version 302272 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:16,680][626795] Updated weights for policy 0, policy_version 302282 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:18,825][626795] Updated weights for policy 0, policy_version 302292 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:18,975][24592] Fps is (10 sec: 40960.8, 60 sec: 40425.5, 300 sec: 40182.5). Total num frames: 2476384256. Throughput: 0: 10077.5. Samples: 369082914. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:18,977][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:20,835][626795] Updated weights for policy 0, policy_version 302302 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:22,762][626795] Updated weights for policy 0, policy_version 302312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:23,976][24592] Fps is (10 sec: 40957.4, 60 sec: 40413.8, 300 sec: 40182.4). Total num frames: 2476589056. Throughput: 0: 10084.3. Samples: 369144576. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:23,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:24,891][626795] Updated weights for policy 0, policy_version 302322 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:26,908][626795] Updated weights for policy 0, policy_version 302332 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:28,871][626795] Updated weights for policy 0, policy_version 302342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:28,976][24592] Fps is (10 sec: 40139.7, 60 sec: 40413.7, 300 sec: 40154.7). Total num frames: 2476785664. Throughput: 0: 10079.7. Samples: 369174324. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:28,978][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:31,066][626795] Updated weights for policy 0, policy_version 302352 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:33,095][626795] Updated weights for policy 0, policy_version 302362 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:33,975][24592] Fps is (10 sec: 38504.9, 60 sec: 40140.8, 300 sec: 40126.9). Total num frames: 2476974080. Throughput: 0: 10035.7. Samples: 369233634. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:33,977][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:35,292][626795] Updated weights for policy 0, policy_version 302372 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:37,248][626795] Updated weights for policy 0, policy_version 302382 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:38,975][24592] Fps is (10 sec: 39322.6, 60 sec: 40141.0, 300 sec: 40127.0). Total num frames: 2477178880. Throughput: 0: 9987.9. Samples: 369293016. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:38,976][24592] Avg episode reward: [(0, '4.872')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:39,371][626795] Updated weights for policy 0, policy_version 302392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:41,382][626795] Updated weights for policy 0, policy_version 302402 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:43,454][626795] Updated weights for policy 0, policy_version 302412 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:43,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40140.8, 300 sec: 40154.7). Total num frames: 2477383680. Throughput: 0: 9998.4. Samples: 369323010. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:43,977][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:45,369][626795] Updated weights for policy 0, policy_version 302422 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:47,454][626795] Updated weights for policy 0, policy_version 302432 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:48,975][24592] Fps is (10 sec: 40140.6, 60 sec: 40141.0, 300 sec: 40126.9). Total num frames: 2477580288. Throughput: 0: 10029.1. Samples: 369383610. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:48,978][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:49,481][626795] Updated weights for policy 0, policy_version 302442 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:51,555][626795] Updated weights for policy 0, policy_version 302452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:53,448][626795] Updated weights for policy 0, policy_version 302462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:53,975][24592] Fps is (10 sec: 40140.9, 60 sec: 40140.8, 300 sec: 40126.9). Total num frames: 2477785088. Throughput: 0: 10037.8. Samples: 369444396. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:53,977][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:55,714][626795] Updated weights for policy 0, policy_version 302472 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:57,578][626795] Updated weights for policy 0, policy_version 302482 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:58,976][24592] Fps is (10 sec: 40139.0, 60 sec: 40005.1, 300 sec: 40099.1). Total num frames: 2477981696. Throughput: 0: 10033.6. Samples: 369473892. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:02:58,977][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:02:59,661][626795] Updated weights for policy 0, policy_version 302492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:01,792][626795] Updated weights for policy 0, policy_version 302502 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:03,798][626795] Updated weights for policy 0, policy_version 302512 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:03,976][24592] Fps is (10 sec: 39320.1, 60 sec: 40004.0, 300 sec: 40099.2). Total num frames: 2478178304. Throughput: 0: 10013.4. Samples: 369533520. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:03,978][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:03,989][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000302513_2478186496.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:04,156][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000301334_2468528128.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:05,989][626795] Updated weights for policy 0, policy_version 302522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:08,016][626795] Updated weights for policy 0, policy_version 302532 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:08,975][24592] Fps is (10 sec: 39323.3, 60 sec: 40004.3, 300 sec: 40071.4). Total num frames: 2478374912. Throughput: 0: 9968.1. Samples: 369593136. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:08,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:09,981][626795] Updated weights for policy 0, policy_version 302542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:12,050][626795] Updated weights for policy 0, policy_version 302552 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:13,975][24592] Fps is (10 sec: 40141.8, 60 sec: 40004.2, 300 sec: 40099.1). Total num frames: 2478579712. Throughput: 0: 9975.0. Samples: 369623196. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:13,977][24592] Avg episode reward: [(0, '4.960')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:14,203][626795] Updated weights for policy 0, policy_version 302562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:16,189][626795] Updated weights for policy 0, policy_version 302572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:18,164][626795] Updated weights for policy 0, policy_version 302582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:18,976][24592] Fps is (10 sec: 40140.1, 60 sec: 39867.5, 300 sec: 40071.3). Total num frames: 2478776320. Throughput: 0: 9981.0. Samples: 369682782. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:18,978][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:20,222][626795] Updated weights for policy 0, policy_version 302592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:22,241][626795] Updated weights for policy 0, policy_version 302602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:23,975][24592] Fps is (10 sec: 40141.2, 60 sec: 39868.1, 300 sec: 40071.7). Total num frames: 2478981120. Throughput: 0: 10017.9. Samples: 369743820. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:23,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:24,321][626795] Updated weights for policy 0, policy_version 302612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:26,308][626795] Updated weights for policy 0, policy_version 302622 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:27,606][626772] Signal inference workers to stop experience collection... (4800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:27,606][626772] Signal inference workers to resume experience collection... (4800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:27,618][626795] InferenceWorker_p0-w0: stopping experience collection (4800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:27,623][626795] InferenceWorker_p0-w0: resuming experience collection (4800 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:28,366][626795] Updated weights for policy 0, policy_version 302632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:28,975][24592] Fps is (10 sec: 40141.5, 60 sec: 39867.9, 300 sec: 40043.6). Total num frames: 2479177728. Throughput: 0: 10019.7. Samples: 369773898. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:28,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:30,361][626795] Updated weights for policy 0, policy_version 302642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:32,440][626795] Updated weights for policy 0, policy_version 302652 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:33,976][24592] Fps is (10 sec: 40139.3, 60 sec: 40140.5, 300 sec: 40071.3). Total num frames: 2479382528. Throughput: 0: 10023.0. Samples: 369834648. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:33,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:34,507][626795] Updated weights for policy 0, policy_version 302662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:36,669][626795] Updated weights for policy 0, policy_version 302672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:38,753][626795] Updated weights for policy 0, policy_version 302682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:38,975][24592] Fps is (10 sec: 39321.7, 60 sec: 39867.7, 300 sec: 40015.9). Total num frames: 2479570944. Throughput: 0: 9946.8. Samples: 369892002. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:38,976][24592] Avg episode reward: [(0, '4.791')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:41,220][626795] Updated weights for policy 0, policy_version 302692 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:43,485][626795] Updated weights for policy 0, policy_version 302702 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:43,975][24592] Fps is (10 sec: 36865.5, 60 sec: 39458.1, 300 sec: 40099.2). Total num frames: 2479751168. Throughput: 0: 9885.8. Samples: 369918750. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:43,977][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:45,926][626795] Updated weights for policy 0, policy_version 302712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:48,628][626795] Updated weights for policy 0, policy_version 302722 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:48,975][24592] Fps is (10 sec: 33587.5, 60 sec: 38775.5, 300 sec: 39960.3). Total num frames: 2479906816. Throughput: 0: 9665.8. Samples: 369968478. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:48,978][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:50,913][626795] Updated weights for policy 0, policy_version 302732 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:52,892][626795] Updated weights for policy 0, policy_version 302742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:53,975][24592] Fps is (10 sec: 34406.4, 60 sec: 38502.4, 300 sec: 39932.5). Total num frames: 2480095232. Throughput: 0: 9566.1. Samples: 370023612. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:53,977][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:55,034][626795] Updated weights for policy 0, policy_version 302752 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:57,089][626795] Updated weights for policy 0, policy_version 302762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:58,975][24592] Fps is (10 sec: 39321.4, 60 sec: 38639.2, 300 sec: 39904.8). Total num frames: 2480300032. Throughput: 0: 9538.3. Samples: 370052418. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:03:58,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:03:59,148][626795] Updated weights for policy 0, policy_version 302772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:01,253][626795] Updated weights for policy 0, policy_version 302782 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:03,162][626795] Updated weights for policy 0, policy_version 302792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:03,975][24592] Fps is (10 sec: 40140.9, 60 sec: 38639.2, 300 sec: 39904.8). Total num frames: 2480496640. Throughput: 0: 9579.1. Samples: 370113840. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:03,977][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:05,178][626795] Updated weights for policy 0, policy_version 302802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:07,300][626795] Updated weights for policy 0, policy_version 302812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:08,975][24592] Fps is (10 sec: 40140.7, 60 sec: 38775.5, 300 sec: 39904.8). Total num frames: 2480701440. Throughput: 0: 9548.0. Samples: 370173480. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:08,977][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:09,421][626795] Updated weights for policy 0, policy_version 302822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:11,682][626795] Updated weights for policy 0, policy_version 302832 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:13,975][24592] Fps is (10 sec: 37683.1, 60 sec: 38229.4, 300 sec: 39821.5). Total num frames: 2480873472. Throughput: 0: 9463.9. Samples: 370199772. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:13,977][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:14,121][626795] Updated weights for policy 0, policy_version 302842 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:16,364][626795] Updated weights for policy 0, policy_version 302852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:18,546][626795] Updated weights for policy 0, policy_version 302862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:18,975][24592] Fps is (10 sec: 35225.8, 60 sec: 37956.4, 300 sec: 39738.2). Total num frames: 2481053696. Throughput: 0: 9299.2. Samples: 370253106. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:18,976][24592] Avg episode reward: [(0, '4.892')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:20,588][626795] Updated weights for policy 0, policy_version 302872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:22,575][626795] Updated weights for policy 0, policy_version 302882 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:23,976][24592] Fps is (10 sec: 38501.9, 60 sec: 37956.2, 300 sec: 39765.9). Total num frames: 2481258496. Throughput: 0: 9362.5. Samples: 370313316. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:23,976][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:24,715][626795] Updated weights for policy 0, policy_version 302892 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:26,678][626795] Updated weights for policy 0, policy_version 302902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:28,678][626795] Updated weights for policy 0, policy_version 302912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:28,975][24592] Fps is (10 sec: 40959.7, 60 sec: 38092.8, 300 sec: 39765.9). Total num frames: 2481463296. Throughput: 0: 9437.9. Samples: 370343454. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:28,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:30,702][626795] Updated weights for policy 0, policy_version 302922 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:32,796][626795] Updated weights for policy 0, policy_version 302932 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:33,976][24592] Fps is (10 sec: 40958.8, 60 sec: 38092.8, 300 sec: 39765.9). Total num frames: 2481668096. Throughput: 0: 9694.6. Samples: 370404738. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:33,978][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:34,690][626795] Updated weights for policy 0, policy_version 302942 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:36,741][626795] Updated weights for policy 0, policy_version 302952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:38,803][626795] Updated weights for policy 0, policy_version 302962 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:38,975][24592] Fps is (10 sec: 40959.9, 60 sec: 38365.8, 300 sec: 39793.7). Total num frames: 2481872896. Throughput: 0: 9819.7. Samples: 370465500. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:38,980][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:40,916][626795] Updated weights for policy 0, policy_version 302972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:43,088][626795] Updated weights for policy 0, policy_version 302982 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:43,975][24592] Fps is (10 sec: 39323.4, 60 sec: 38502.4, 300 sec: 39738.1). Total num frames: 2482061312. Throughput: 0: 9829.7. Samples: 370494756. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:43,976][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:45,081][626795] Updated weights for policy 0, policy_version 302992 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:47,007][626795] Updated weights for policy 0, policy_version 303002 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:48,975][24592] Fps is (10 sec: 39321.7, 60 sec: 39321.5, 300 sec: 39766.0). Total num frames: 2482266112. Throughput: 0: 9795.2. Samples: 370554624. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:48,977][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:49,098][626795] Updated weights for policy 0, policy_version 303012 (0.0036)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:51,150][626795] Updated weights for policy 0, policy_version 303022 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:53,049][626795] Updated weights for policy 0, policy_version 303032 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:53,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39594.7, 300 sec: 39793.7). Total num frames: 2482470912. Throughput: 0: 9824.8. Samples: 370615596. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:53,977][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:55,240][626795] Updated weights for policy 0, policy_version 303042 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:57,157][626795] Updated weights for policy 0, policy_version 303052 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:58,975][24592] Fps is (10 sec: 40960.2, 60 sec: 39594.7, 300 sec: 39793.7). Total num frames: 2482675712. Throughput: 0: 9908.4. Samples: 370645650. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:04:58,976][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:04:59,157][626795] Updated weights for policy 0, policy_version 303062 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:01,126][626795] Updated weights for policy 0, policy_version 303072 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:03,142][626795] Updated weights for policy 0, policy_version 303082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:03,975][24592] Fps is (10 sec: 40960.0, 60 sec: 39731.2, 300 sec: 39793.7). Total num frames: 2482880512. Throughput: 0: 10106.1. Samples: 370707882. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:03,977][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000303086_2482880512.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:04,064][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000301923_2473353216.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:05,101][626795] Updated weights for policy 0, policy_version 303092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:07,211][626795] Updated weights for policy 0, policy_version 303102 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:08,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39731.2, 300 sec: 39793.7). Total num frames: 2483085312. Throughput: 0: 10141.5. Samples: 370769682. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:08,976][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:09,122][626795] Updated weights for policy 0, policy_version 303112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:11,137][626795] Updated weights for policy 0, policy_version 303122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:13,293][626795] Updated weights for policy 0, policy_version 303132 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:13,975][24592] Fps is (10 sec: 40140.5, 60 sec: 40140.8, 300 sec: 39766.2). Total num frames: 2483281920. Throughput: 0: 10131.5. Samples: 370799370. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:13,977][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:15,781][626795] Updated weights for policy 0, policy_version 303142 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:17,651][626795] Updated weights for policy 0, policy_version 303152 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:18,975][24592] Fps is (10 sec: 38502.1, 60 sec: 40277.3, 300 sec: 39738.1). Total num frames: 2483470336. Throughput: 0: 9991.2. Samples: 370854336. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:18,976][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:19,804][626795] Updated weights for policy 0, policy_version 303162 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:21,901][626795] Updated weights for policy 0, policy_version 303172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:23,851][626795] Updated weights for policy 0, policy_version 303182 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:23,976][24592] Fps is (10 sec: 38502.2, 60 sec: 40140.9, 300 sec: 39738.2). Total num frames: 2483666944. Throughput: 0: 10002.1. Samples: 370915596. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:23,978][24592] Avg episode reward: [(0, '5.039')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:25,895][626795] Updated weights for policy 0, policy_version 303192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:27,823][626795] Updated weights for policy 0, policy_version 303202 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:28,975][24592] Fps is (10 sec: 40141.1, 60 sec: 40140.8, 300 sec: 39766.2). Total num frames: 2483871744. Throughput: 0: 10018.5. Samples: 370945590. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:28,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:29,965][626795] Updated weights for policy 0, policy_version 303212 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:31,866][626795] Updated weights for policy 0, policy_version 303222 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:33,975][24592] Fps is (10 sec: 39322.0, 60 sec: 39868.1, 300 sec: 39710.4). Total num frames: 2484060160. Throughput: 0: 10035.9. Samples: 371006238. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:33,976][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:34,425][626795] Updated weights for policy 0, policy_version 303232 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:36,350][626795] Updated weights for policy 0, policy_version 303242 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:38,444][626795] Updated weights for policy 0, policy_version 303252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:38,975][24592] Fps is (10 sec: 38502.4, 60 sec: 39731.3, 300 sec: 39682.6). Total num frames: 2484256768. Throughput: 0: 9932.0. Samples: 371062536. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:38,977][24592] Avg episode reward: [(0, '4.833')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:40,604][626795] Updated weights for policy 0, policy_version 303262 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:42,733][626795] Updated weights for policy 0, policy_version 303272 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:43,978][24592] Fps is (10 sec: 39313.1, 60 sec: 39866.3, 300 sec: 39682.3). Total num frames: 2484453376. Throughput: 0: 9921.9. Samples: 371092158. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:43,980][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:44,749][626795] Updated weights for policy 0, policy_version 303282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:46,852][626795] Updated weights for policy 0, policy_version 303292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:48,780][626795] Updated weights for policy 0, policy_version 303302 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:48,975][24592] Fps is (10 sec: 39321.4, 60 sec: 39731.2, 300 sec: 39682.6). Total num frames: 2484649984. Throughput: 0: 9847.6. Samples: 371151024. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:48,976][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:50,873][626795] Updated weights for policy 0, policy_version 303312 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:52,880][626795] Updated weights for policy 0, policy_version 303322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:53,976][24592] Fps is (10 sec: 40149.0, 60 sec: 39731.1, 300 sec: 39682.6). Total num frames: 2484854784. Throughput: 0: 9831.3. Samples: 371212092. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:53,976][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:54,974][626795] Updated weights for policy 0, policy_version 303332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:56,996][626795] Updated weights for policy 0, policy_version 303342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:58,975][24592] Fps is (10 sec: 40140.9, 60 sec: 39594.7, 300 sec: 39682.6). Total num frames: 2485051392. Throughput: 0: 9827.7. Samples: 371241618. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:05:58,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:05:58,981][626795] Updated weights for policy 0, policy_version 303352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:01,121][626795] Updated weights for policy 0, policy_version 303362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:03,036][626795] Updated weights for policy 0, policy_version 303372 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:03,975][24592] Fps is (10 sec: 40141.2, 60 sec: 39594.6, 300 sec: 39682.6). Total num frames: 2485256192. Throughput: 0: 9945.5. Samples: 371301882. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:03,977][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:05,196][626795] Updated weights for policy 0, policy_version 303382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:07,139][626795] Updated weights for policy 0, policy_version 303392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:08,975][24592] Fps is (10 sec: 40960.0, 60 sec: 39594.7, 300 sec: 39682.6). Total num frames: 2485460992. Throughput: 0: 9942.4. Samples: 371363004. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:08,977][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:09,210][626795] Updated weights for policy 0, policy_version 303402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:11,245][626795] Updated weights for policy 0, policy_version 303412 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:13,235][626795] Updated weights for policy 0, policy_version 303422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:13,976][24592] Fps is (10 sec: 40140.0, 60 sec: 39594.6, 300 sec: 39657.1). Total num frames: 2485657600. Throughput: 0: 9944.1. Samples: 371393076. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:13,977][24592] Avg episode reward: [(0, '4.864')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:15,393][626795] Updated weights for policy 0, policy_version 303432 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:17,457][626795] Updated weights for policy 0, policy_version 303442 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:18,975][24592] Fps is (10 sec: 39321.4, 60 sec: 39731.2, 300 sec: 39627.1). Total num frames: 2485854208. Throughput: 0: 9908.9. Samples: 371452140. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:18,976][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:19,529][626795] Updated weights for policy 0, policy_version 303452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:21,648][626795] Updated weights for policy 0, policy_version 303462 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:23,682][626795] Updated weights for policy 0, policy_version 303472 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:23,975][24592] Fps is (10 sec: 39322.4, 60 sec: 39731.3, 300 sec: 39627.1). Total num frames: 2486050816. Throughput: 0: 9975.7. Samples: 371511444. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:23,977][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:25,866][626795] Updated weights for policy 0, policy_version 303482 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:27,825][626795] Updated weights for policy 0, policy_version 303492 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:28,975][24592] Fps is (10 sec: 39321.8, 60 sec: 39594.7, 300 sec: 39599.3). Total num frames: 2486247424. Throughput: 0: 9967.8. Samples: 371540688. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:28,976][24592] Avg episode reward: [(0, '4.872')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:29,984][626795] Updated weights for policy 0, policy_version 303502 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:31,830][626795] Updated weights for policy 0, policy_version 303512 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:33,975][24592] Fps is (10 sec: 38502.6, 60 sec: 39594.7, 300 sec: 39543.8). Total num frames: 2486435840. Throughput: 0: 9944.8. Samples: 371598540. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:33,976][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:34,322][626795] Updated weights for policy 0, policy_version 303522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:36,405][626795] Updated weights for policy 0, policy_version 303532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:38,317][626795] Updated weights for policy 0, policy_version 303542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:38,975][24592] Fps is (10 sec: 39321.7, 60 sec: 39731.2, 300 sec: 39543.8). Total num frames: 2486640640. Throughput: 0: 9918.0. Samples: 371658402. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:38,976][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:40,315][626795] Updated weights for policy 0, policy_version 303552 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:42,353][626795] Updated weights for policy 0, policy_version 303562 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:43,975][24592] Fps is (10 sec: 40959.6, 60 sec: 39869.1, 300 sec: 39571.6). Total num frames: 2486845440. Throughput: 0: 9937.0. Samples: 371688786. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:43,976][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:44,391][626795] Updated weights for policy 0, policy_version 303572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:46,474][626795] Updated weights for policy 0, policy_version 303582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:48,555][626795] Updated weights for policy 0, policy_version 303592 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:48,976][24592] Fps is (10 sec: 39319.7, 60 sec: 39730.9, 300 sec: 39515.9). Total num frames: 2487033856. Throughput: 0: 9922.4. Samples: 371748396. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:48,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:50,690][626795] Updated weights for policy 0, policy_version 303602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:52,707][626795] Updated weights for policy 0, policy_version 303612 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:53,975][24592] Fps is (10 sec: 39321.5, 60 sec: 39731.2, 300 sec: 39516.2). Total num frames: 2487238656. Throughput: 0: 9877.7. Samples: 371807502. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:53,976][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:54,770][626795] Updated weights for policy 0, policy_version 303622 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:56,834][626795] Updated weights for policy 0, policy_version 303632 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:06:58,771][626795] Updated weights for policy 0, policy_version 303642 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:58,976][24592] Fps is (10 sec: 40140.2, 60 sec: 39730.8, 300 sec: 39515.9). Total num frames: 2487435264. Throughput: 0: 9873.2. Samples: 371837376. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:06:58,977][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:00,905][626795] Updated weights for policy 0, policy_version 303652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:02,917][626795] Updated weights for policy 0, policy_version 303662 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:03,975][24592] Fps is (10 sec: 40140.7, 60 sec: 39731.2, 300 sec: 39543.8). Total num frames: 2487640064. Throughput: 0: 9895.6. Samples: 371897442. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:03,976][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000303667_2487640064.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:04,126][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000302513_2478186496.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:05,111][626795] Updated weights for policy 0, policy_version 303672 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:07,251][626795] Updated weights for policy 0, policy_version 303682 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:08,975][24592] Fps is (10 sec: 39323.7, 60 sec: 39458.1, 300 sec: 39488.2). Total num frames: 2487828480. Throughput: 0: 9884.0. Samples: 371956224. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:08,977][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:09,170][626795] Updated weights for policy 0, policy_version 303692 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:11,280][626795] Updated weights for policy 0, policy_version 303702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:13,473][626795] Updated weights for policy 0, policy_version 303712 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:13,975][24592] Fps is (10 sec: 38502.9, 60 sec: 39458.3, 300 sec: 39460.4). Total num frames: 2488025088. Throughput: 0: 9898.4. Samples: 371986116. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:13,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:15,728][626795] Updated weights for policy 0, policy_version 303722 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:17,653][626795] Updated weights for policy 0, policy_version 303732 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:18,976][24592] Fps is (10 sec: 38501.4, 60 sec: 39321.4, 300 sec: 39405.0). Total num frames: 2488213504. Throughput: 0: 9893.8. Samples: 372043764. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:18,977][24592] Avg episode reward: [(0, '4.463')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:19,891][626795] Updated weights for policy 0, policy_version 303742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:21,858][626795] Updated weights for policy 0, policy_version 303752 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:23,976][24592] Fps is (10 sec: 38499.6, 60 sec: 39321.2, 300 sec: 39404.8). Total num frames: 2488410112. Throughput: 0: 9855.7. Samples: 372101916. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:23,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:24,061][626795] Updated weights for policy 0, policy_version 303762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:26,126][626795] Updated weights for policy 0, policy_version 303772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:28,109][626795] Updated weights for policy 0, policy_version 303782 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:28,975][24592] Fps is (10 sec: 40142.1, 60 sec: 39458.1, 300 sec: 39460.4). Total num frames: 2488614912. Throughput: 0: 9833.5. Samples: 372131292. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:28,976][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:30,275][626795] Updated weights for policy 0, policy_version 303792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:32,352][626795] Updated weights for policy 0, policy_version 303802 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:33,975][24592] Fps is (10 sec: 40143.6, 60 sec: 39594.6, 300 sec: 39432.7). Total num frames: 2488811520. Throughput: 0: 9845.7. Samples: 372191448. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:33,977][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:34,232][626795] Updated weights for policy 0, policy_version 303812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:36,344][626795] Updated weights for policy 0, policy_version 303822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:38,389][626795] Updated weights for policy 0, policy_version 303832 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:38,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39594.6, 300 sec: 39432.7). Total num frames: 2489016320. Throughput: 0: 9875.9. Samples: 372251916. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:38,976][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:40,381][626795] Updated weights for policy 0, policy_version 303842 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:42,512][626795] Updated weights for policy 0, policy_version 303852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:43,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39458.2, 300 sec: 39432.7). Total num frames: 2489212928. Throughput: 0: 9873.7. Samples: 372281688. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:43,977][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:44,516][626795] Updated weights for policy 0, policy_version 303862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:46,525][626795] Updated weights for policy 0, policy_version 303872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:48,508][626795] Updated weights for policy 0, policy_version 303882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:48,976][24592] Fps is (10 sec: 40140.0, 60 sec: 39731.4, 300 sec: 39432.6). Total num frames: 2489417728. Throughput: 0: 9885.8. Samples: 372342306. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:48,976][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:50,628][626795] Updated weights for policy 0, policy_version 303892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:52,706][626795] Updated weights for policy 0, policy_version 303902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:53,977][24592] Fps is (10 sec: 40137.6, 60 sec: 39594.2, 300 sec: 39432.6). Total num frames: 2489614336. Throughput: 0: 9889.2. Samples: 372401244. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:53,979][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:54,921][626795] Updated weights for policy 0, policy_version 303912 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:57,073][626795] Updated weights for policy 0, policy_version 303922 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:58,976][24592] Fps is (10 sec: 37681.1, 60 sec: 39321.5, 300 sec: 39377.1). Total num frames: 2489794560. Throughput: 0: 9834.9. Samples: 372428694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:07:58,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:07:59,465][626795] Updated weights for policy 0, policy_version 303932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:01,864][626795] Updated weights for policy 0, policy_version 303942 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:03,871][626795] Updated weights for policy 0, policy_version 303952 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:03,975][24592] Fps is (10 sec: 36047.8, 60 sec: 38912.1, 300 sec: 39321.6). Total num frames: 2489974784. Throughput: 0: 9739.0. Samples: 372482016. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:03,976][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:05,962][626795] Updated weights for policy 0, policy_version 303962 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:08,090][626795] Updated weights for policy 0, policy_version 303972 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:08,975][24592] Fps is (10 sec: 37686.0, 60 sec: 39048.6, 300 sec: 39293.8). Total num frames: 2490171392. Throughput: 0: 9748.4. Samples: 372540588. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:08,979][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:10,239][626795] Updated weights for policy 0, policy_version 303982 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:12,198][626795] Updated weights for policy 0, policy_version 303992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:13,975][24592] Fps is (10 sec: 39321.3, 60 sec: 39048.5, 300 sec: 39293.9). Total num frames: 2490368000. Throughput: 0: 9756.2. Samples: 372570324. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:13,977][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:14,245][626795] Updated weights for policy 0, policy_version 304002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:16,389][626795] Updated weights for policy 0, policy_version 304012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:18,269][626795] Updated weights for policy 0, policy_version 304022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:18,975][24592] Fps is (10 sec: 40141.0, 60 sec: 39321.8, 300 sec: 39293.8). Total num frames: 2490572800. Throughput: 0: 9768.7. Samples: 372631038. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:18,976][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:20,362][626795] Updated weights for policy 0, policy_version 304032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:22,761][626795] Updated weights for policy 0, policy_version 304042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:23,976][24592] Fps is (10 sec: 38501.6, 60 sec: 39048.8, 300 sec: 39238.3). Total num frames: 2490753024. Throughput: 0: 9683.5. Samples: 372687678. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:23,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:24,920][626795] Updated weights for policy 0, policy_version 304052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:26,942][626795] Updated weights for policy 0, policy_version 304062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:28,976][24592] Fps is (10 sec: 38501.1, 60 sec: 39048.3, 300 sec: 39238.3). Total num frames: 2490957824. Throughput: 0: 9681.1. Samples: 372717342. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:28,977][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:28,981][626795] Updated weights for policy 0, policy_version 304072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:30,986][626795] Updated weights for policy 0, policy_version 304082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:33,059][626795] Updated weights for policy 0, policy_version 304092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:33,975][24592] Fps is (10 sec: 40141.9, 60 sec: 39048.5, 300 sec: 39266.1). Total num frames: 2491154432. Throughput: 0: 9670.4. Samples: 372777474. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:33,977][24592] Avg episode reward: [(0, '4.867')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:35,531][626795] Updated weights for policy 0, policy_version 304102 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:37,542][626795] Updated weights for policy 0, policy_version 304112 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:38,975][24592] Fps is (10 sec: 37684.2, 60 sec: 38638.9, 300 sec: 39266.1). Total num frames: 2491334656. Throughput: 0: 9607.6. Samples: 372833580. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:38,976][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:39,622][626795] Updated weights for policy 0, policy_version 304122 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:41,678][626795] Updated weights for policy 0, policy_version 304132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:43,582][626795] Updated weights for policy 0, policy_version 304142 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:43,976][24592] Fps is (10 sec: 38501.6, 60 sec: 38775.3, 300 sec: 39432.6). Total num frames: 2491539456. Throughput: 0: 9664.0. Samples: 372863568. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:43,978][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:45,803][626795] Updated weights for policy 0, policy_version 304152 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:47,693][626795] Updated weights for policy 0, policy_version 304162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:48,976][24592] Fps is (10 sec: 40139.4, 60 sec: 38638.8, 300 sec: 39460.4). Total num frames: 2491736064. Throughput: 0: 9814.9. Samples: 372923688. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:48,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:49,824][626795] Updated weights for policy 0, policy_version 304172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:51,848][626795] Updated weights for policy 0, policy_version 304182 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:53,840][626795] Updated weights for policy 0, policy_version 304192 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:53,976][24592] Fps is (10 sec: 40141.1, 60 sec: 38775.9, 300 sec: 39460.4). Total num frames: 2491940864. Throughput: 0: 9856.0. Samples: 372984108. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:53,978][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:55,984][626795] Updated weights for policy 0, policy_version 304202 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:08:58,046][626795] Updated weights for policy 0, policy_version 304212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:58,975][24592] Fps is (10 sec: 40141.9, 60 sec: 39049.0, 300 sec: 39460.4). Total num frames: 2492137472. Throughput: 0: 9843.2. Samples: 373013268. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:08:58,996][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:00,142][626795] Updated weights for policy 0, policy_version 304222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:02,253][626795] Updated weights for policy 0, policy_version 304232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:03,976][24592] Fps is (10 sec: 39321.3, 60 sec: 39321.4, 300 sec: 39432.7). Total num frames: 2492334080. Throughput: 0: 9792.9. Samples: 373071720. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:03,977][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000304240_2492334080.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:04,133][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000303086_2482880512.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:04,528][626795] Updated weights for policy 0, policy_version 304242 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:04,553][626772] Signal inference workers to stop experience collection... (4850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:04,554][626772] Signal inference workers to resume experience collection... (4850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:04,561][626795] InferenceWorker_p0-w0: stopping experience collection (4850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:04,564][626795] InferenceWorker_p0-w0: resuming experience collection (4850 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:06,403][626795] Updated weights for policy 0, policy_version 304252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:08,629][626795] Updated weights for policy 0, policy_version 304262 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:08,976][24592] Fps is (10 sec: 38501.9, 60 sec: 39184.9, 300 sec: 39488.2). Total num frames: 2492522496. Throughput: 0: 9829.2. Samples: 373129992. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:08,977][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:10,818][626795] Updated weights for policy 0, policy_version 304272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:12,775][626795] Updated weights for policy 0, policy_version 304282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:13,975][24592] Fps is (10 sec: 38503.1, 60 sec: 39185.1, 300 sec: 39543.7). Total num frames: 2492719104. Throughput: 0: 9811.0. Samples: 373158834. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:13,976][24592] Avg episode reward: [(0, '4.877')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:14,880][626795] Updated weights for policy 0, policy_version 304292 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:16,925][626795] Updated weights for policy 0, policy_version 304302 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:18,979][24592] Fps is (10 sec: 38488.1, 60 sec: 38909.4, 300 sec: 39487.7). Total num frames: 2492907520. Throughput: 0: 9799.9. Samples: 373218510. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:18,980][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:19,276][626795] Updated weights for policy 0, policy_version 304312 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:21,342][626795] Updated weights for policy 0, policy_version 304322 (0.0039)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:23,376][626795] Updated weights for policy 0, policy_version 304332 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:23,975][24592] Fps is (10 sec: 38502.6, 60 sec: 39185.2, 300 sec: 39460.5). Total num frames: 2493104128. Throughput: 0: 9818.7. Samples: 373275420. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:23,976][24592] Avg episode reward: [(0, '4.950')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:25,755][626795] Updated weights for policy 0, policy_version 304342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:28,152][626795] Updated weights for policy 0, policy_version 304352 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:28,975][24592] Fps is (10 sec: 37698.2, 60 sec: 38775.7, 300 sec: 39377.2). Total num frames: 2493284352. Throughput: 0: 9752.7. Samples: 373302438. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:28,976][24592] Avg episode reward: [(0, '4.872')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:30,130][626795] Updated weights for policy 0, policy_version 304362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:32,337][626795] Updated weights for policy 0, policy_version 304372 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:33,978][24592] Fps is (10 sec: 37682.3, 60 sec: 38775.3, 300 sec: 39349.3). Total num frames: 2493480960. Throughput: 0: 9647.0. Samples: 373357800. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:33,980][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:34,575][626795] Updated weights for policy 0, policy_version 304382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:37,375][626795] Updated weights for policy 0, policy_version 304392 (0.0030)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:38,975][24592] Fps is (10 sec: 36045.0, 60 sec: 38502.5, 300 sec: 39266.1). Total num frames: 2493644800. Throughput: 0: 9455.1. Samples: 373409586. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:38,977][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:39,424][626795] Updated weights for policy 0, policy_version 304402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:41,674][626795] Updated weights for policy 0, policy_version 304412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:43,976][24592] Fps is (10 sec: 31946.7, 60 sec: 37682.8, 300 sec: 39099.3). Total num frames: 2493800448. Throughput: 0: 9374.5. Samples: 373435128. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:43,977][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:44,599][626795] Updated weights for policy 0, policy_version 304422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:47,164][626795] Updated weights for policy 0, policy_version 304432 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:48,977][24592] Fps is (10 sec: 31944.5, 60 sec: 37136.5, 300 sec: 38960.4). Total num frames: 2493964288. Throughput: 0: 9111.3. Samples: 373481736. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:48,978][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:49,814][626795] Updated weights for policy 0, policy_version 304442 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:52,346][626795] Updated weights for policy 0, policy_version 304452 (0.0041)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:53,977][24592] Fps is (10 sec: 31946.5, 60 sec: 36317.0, 300 sec: 38793.8). Total num frames: 2494119936. Throughput: 0: 8882.9. Samples: 373529736. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:53,979][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:54,774][626795] Updated weights for policy 0, policy_version 304462 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:09:57,384][626795] Updated weights for policy 0, policy_version 304472 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:58,975][24592] Fps is (10 sec: 31952.9, 60 sec: 35771.8, 300 sec: 38655.1). Total num frames: 2494283776. Throughput: 0: 8773.2. Samples: 373553628. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:09:58,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:00,042][626795] Updated weights for policy 0, policy_version 304482 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:02,635][626795] Updated weights for policy 0, policy_version 304492 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:03,976][24592] Fps is (10 sec: 31134.1, 60 sec: 34952.6, 300 sec: 38460.7). Total num frames: 2494431232. Throughput: 0: 8500.7. Samples: 373601010. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:03,979][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:05,222][626795] Updated weights for policy 0, policy_version 304502 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:07,866][626795] Updated weights for policy 0, policy_version 304512 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:08,977][24592] Fps is (10 sec: 30310.7, 60 sec: 34406.6, 300 sec: 38321.9). Total num frames: 2494586880. Throughput: 0: 8256.0. Samples: 373646940. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:08,979][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:10,538][626795] Updated weights for policy 0, policy_version 304522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:13,097][626795] Updated weights for policy 0, policy_version 304532 (0.0045)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:13,976][24592] Fps is (10 sec: 31948.3, 60 sec: 33860.1, 300 sec: 38238.6). Total num frames: 2494750720. Throughput: 0: 8173.3. Samples: 373670238. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:13,978][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:15,702][626795] Updated weights for policy 0, policy_version 304542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:17,956][626795] Updated weights for policy 0, policy_version 304552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:18,978][24592] Fps is (10 sec: 32758.7, 60 sec: 33451.3, 300 sec: 38127.2). Total num frames: 2494914560. Throughput: 0: 8078.4. Samples: 373721346. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:18,979][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:20,812][626795] Updated weights for policy 0, policy_version 304562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:22,773][626795] Updated weights for policy 0, policy_version 304572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:23,975][24592] Fps is (10 sec: 34407.5, 60 sec: 33177.6, 300 sec: 38044.2). Total num frames: 2495094784. Throughput: 0: 8077.3. Samples: 373773066. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:23,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:24,915][626795] Updated weights for policy 0, policy_version 304582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:26,965][626795] Updated weights for policy 0, policy_version 304592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:28,852][626795] Updated weights for policy 0, policy_version 304602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:28,976][24592] Fps is (10 sec: 38508.7, 60 sec: 33586.6, 300 sec: 38099.6). Total num frames: 2495299584. Throughput: 0: 8167.7. Samples: 373802676. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:28,978][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:31,468][626795] Updated weights for policy 0, policy_version 304612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:33,959][626795] Updated weights for policy 0, policy_version 304622 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:33,976][24592] Fps is (10 sec: 36863.2, 60 sec: 33041.1, 300 sec: 37988.6). Total num frames: 2495463424. Throughput: 0: 8237.3. Samples: 373852404. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:33,978][24592] Avg episode reward: [(0, '5.057')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:36,526][626795] Updated weights for policy 0, policy_version 304632 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:38,638][626795] Updated weights for policy 0, policy_version 304642 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:38,975][24592] Fps is (10 sec: 33590.9, 60 sec: 33177.6, 300 sec: 37905.6). Total num frames: 2495635456. Throughput: 0: 8398.4. Samples: 373907652. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:38,976][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:41,292][626795] Updated weights for policy 0, policy_version 304652 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:43,754][626795] Updated weights for policy 0, policy_version 304662 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:43,976][24592] Fps is (10 sec: 32767.6, 60 sec: 33177.9, 300 sec: 37766.5). Total num frames: 2495791104. Throughput: 0: 8385.0. Samples: 373930956. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:43,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:46,284][626795] Updated weights for policy 0, policy_version 304672 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:48,873][626795] Updated weights for policy 0, policy_version 304682 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:48,975][24592] Fps is (10 sec: 31948.7, 60 sec: 33178.3, 300 sec: 37627.7). Total num frames: 2495954944. Throughput: 0: 8422.0. Samples: 373980000. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:48,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:51,479][626795] Updated weights for policy 0, policy_version 304692 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:53,959][626795] Updated weights for policy 0, policy_version 304702 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:53,975][24592] Fps is (10 sec: 32769.1, 60 sec: 33315.0, 300 sec: 37516.6). Total num frames: 2496118784. Throughput: 0: 8472.0. Samples: 374028180. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:53,976][24592] Avg episode reward: [(0, '4.972')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:56,601][626795] Updated weights for policy 0, policy_version 304712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:58,977][24592] Fps is (10 sec: 31944.6, 60 sec: 33176.8, 300 sec: 37349.8). Total num frames: 2496274432. Throughput: 0: 8482.9. Samples: 374051976. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:10:58,980][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:10:59,170][626795] Updated weights for policy 0, policy_version 304722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:01,556][626795] Updated weights for policy 0, policy_version 304732 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:03,975][24592] Fps is (10 sec: 31948.8, 60 sec: 33450.8, 300 sec: 37211.1). Total num frames: 2496438272. Throughput: 0: 8428.0. Samples: 374100582. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:03,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000304741_2496438272.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:04,148][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000303667_2487640064.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:04,261][626795] Updated weights for policy 0, policy_version 304742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:07,128][626795] Updated weights for policy 0, policy_version 304752 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:08,976][24592] Fps is (10 sec: 31133.4, 60 sec: 33314.0, 300 sec: 37044.5). Total num frames: 2496585728. Throughput: 0: 8266.2. Samples: 374145048. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:08,976][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:09,829][626795] Updated weights for policy 0, policy_version 304762 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:11,949][626795] Updated weights for policy 0, policy_version 304772 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:13,975][24592] Fps is (10 sec: 32768.1, 60 sec: 33587.4, 300 sec: 36989.0). Total num frames: 2496765952. Throughput: 0: 8171.7. Samples: 374170392. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:13,976][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:14,180][626795] Updated weights for policy 0, policy_version 304782 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:16,718][626795] Updated weights for policy 0, policy_version 304792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:18,975][24592] Fps is (10 sec: 34407.0, 60 sec: 33588.7, 300 sec: 36877.9). Total num frames: 2496929792. Throughput: 0: 8231.9. Samples: 374222838. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:18,976][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:19,146][626795] Updated weights for policy 0, policy_version 304802 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:21,862][626795] Updated weights for policy 0, policy_version 304812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:23,976][24592] Fps is (10 sec: 32767.4, 60 sec: 33314.1, 300 sec: 36766.8). Total num frames: 2497093632. Throughput: 0: 8090.0. Samples: 374271702. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:23,977][24592] Avg episode reward: [(0, '4.854')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:24,241][626795] Updated weights for policy 0, policy_version 304822 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:26,751][626795] Updated weights for policy 0, policy_version 304832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:28,975][24592] Fps is (10 sec: 31948.8, 60 sec: 32495.5, 300 sec: 36655.7). Total num frames: 2497249280. Throughput: 0: 8105.8. Samples: 374295714. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:28,976][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:29,476][626795] Updated weights for policy 0, policy_version 304842 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:31,590][626795] Updated weights for policy 0, policy_version 304852 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:33,946][626795] Updated weights for policy 0, policy_version 304862 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:33,975][24592] Fps is (10 sec: 33587.5, 60 sec: 32768.1, 300 sec: 36572.4). Total num frames: 2497429504. Throughput: 0: 8123.6. Samples: 374345562. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:33,977][24592] Avg episode reward: [(0, '4.316')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:36,073][626795] Updated weights for policy 0, policy_version 304872 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:38,483][626795] Updated weights for policy 0, policy_version 304882 (0.0030)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:38,975][24592] Fps is (10 sec: 35225.2, 60 sec: 32768.0, 300 sec: 36461.3). Total num frames: 2497601536. Throughput: 0: 8271.2. Samples: 374400384. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:38,976][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:40,691][626795] Updated weights for policy 0, policy_version 304892 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:43,888][626795] Updated weights for policy 0, policy_version 304902 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:43,987][24592] Fps is (10 sec: 32731.0, 60 sec: 32762.0, 300 sec: 36348.9). Total num frames: 2497757184. Throughput: 0: 8354.8. Samples: 374428026. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:43,988][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:48,547][626795] Updated weights for policy 0, policy_version 304912 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:48,975][24592] Fps is (10 sec: 23757.1, 60 sec: 31402.7, 300 sec: 35933.7). Total num frames: 2497839104. Throughput: 0: 7910.9. Samples: 374456574. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:48,976][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:53,335][626795] Updated weights for policy 0, policy_version 304922 (0.0033)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:53,979][24592] Fps is (10 sec: 17216.3, 60 sec: 30172.0, 300 sec: 35572.3). Total num frames: 2497929216. Throughput: 0: 7508.1. Samples: 374482938. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:53,998][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:11:58,354][626795] Updated weights for policy 0, policy_version 304932 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:59,012][24592] Fps is (10 sec: 16369.8, 60 sec: 28805.1, 300 sec: 35127.4). Total num frames: 2498002944. Throughput: 0: 7220.3. Samples: 374495370. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:11:59,027][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:02,916][626795] Updated weights for policy 0, policy_version 304942 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:03,984][24592] Fps is (10 sec: 17195.5, 60 sec: 27712.5, 300 sec: 34822.0). Total num frames: 2498101248. Throughput: 0: 6621.6. Samples: 374520864. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:03,987][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:07,911][626795] Updated weights for policy 0, policy_version 304952 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:08,975][24592] Fps is (10 sec: 18038.0, 60 sec: 26624.1, 300 sec: 34434.2). Total num frames: 2498183168. Throughput: 0: 6096.7. Samples: 374546052. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:08,976][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:13,110][626795] Updated weights for policy 0, policy_version 304962 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:13,976][24592] Fps is (10 sec: 15577.1, 60 sec: 24848.9, 300 sec: 34045.4). Total num frames: 2498256896. Throughput: 0: 5800.2. Samples: 374556726. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:13,977][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:17,879][626795] Updated weights for policy 0, policy_version 304972 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:18,985][24592] Fps is (10 sec: 16368.8, 60 sec: 23616.6, 300 sec: 33683.4). Total num frames: 2498347008. Throughput: 0: 5267.5. Samples: 374582646. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:18,985][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:22,685][626795] Updated weights for policy 0, policy_version 304982 (0.0049)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:23,983][24592] Fps is (10 sec: 17190.8, 60 sec: 22252.2, 300 sec: 33267.0). Total num frames: 2498428928. Throughput: 0: 4630.2. Samples: 374608776. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:23,986][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:27,263][626795] Updated weights for policy 0, policy_version 304992 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:28,976][24592] Fps is (10 sec: 17218.7, 60 sec: 21162.6, 300 sec: 32906.8). Total num frames: 2498519040. Throughput: 0: 4314.0. Samples: 374622108. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:28,977][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:32,177][626795] Updated weights for policy 0, policy_version 305002 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:33,975][24592] Fps is (10 sec: 17216.2, 60 sec: 19524.3, 300 sec: 32490.3). Total num frames: 2498600960. Throughput: 0: 4228.0. Samples: 374646834. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:33,977][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:36,822][626795] Updated weights for policy 0, policy_version 305012 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:38,975][24592] Fps is (10 sec: 17203.5, 60 sec: 18158.9, 300 sec: 32129.3). Total num frames: 2498691072. Throughput: 0: 4207.7. Samples: 374672268. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:38,976][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:42,259][626795] Updated weights for policy 0, policy_version 305022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:43,982][24592] Fps is (10 sec: 15554.3, 60 sec: 16658.4, 300 sec: 31656.5). Total num frames: 2498756608. Throughput: 0: 4201.5. Samples: 374684430. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:43,983][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:47,694][626795] Updated weights for policy 0, policy_version 305032 (0.0030)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:48,979][24592] Fps is (10 sec: 14744.8, 60 sec: 16656.9, 300 sec: 31268.5). Total num frames: 2498838528. Throughput: 0: 4117.5. Samples: 374706120. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:48,982][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:52,437][626795] Updated weights for policy 0, policy_version 305042 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:53,983][24592] Fps is (10 sec: 17202.5, 60 sec: 16656.1, 300 sec: 30962.3). Total num frames: 2498928640. Throughput: 0: 4151.2. Samples: 374732886. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:53,986][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:56,288][626795] Updated weights for policy 0, policy_version 305052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:12:58,282][626795] Updated weights for policy 0, policy_version 305062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:58,976][24592] Fps is (10 sec: 25395.3, 60 sec: 18161.4, 300 sec: 30907.4). Total num frames: 2499092480. Throughput: 0: 4310.0. Samples: 374750676. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:12:58,978][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:00,234][626795] Updated weights for policy 0, policy_version 305072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:02,351][626795] Updated weights for policy 0, policy_version 305082 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:03,976][24592] Fps is (10 sec: 36889.7, 60 sec: 19936.6, 300 sec: 30935.2). Total num frames: 2499297280. Throughput: 0: 5103.8. Samples: 374812272. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:03,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000305090_2499297280.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:04,126][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000304240_2492334080.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:04,418][626795] Updated weights for policy 0, policy_version 305092 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:06,496][626795] Updated weights for policy 0, policy_version 305102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:08,387][626795] Updated weights for policy 0, policy_version 305112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:08,975][24592] Fps is (10 sec: 40143.4, 60 sec: 21845.3, 300 sec: 30935.2). Total num frames: 2499493888. Throughput: 0: 5860.3. Samples: 374872446. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:08,977][24592] Avg episode reward: [(0, '4.870')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:10,529][626795] Updated weights for policy 0, policy_version 305122 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:12,546][626795] Updated weights for policy 0, policy_version 305132 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:13,976][24592] Fps is (10 sec: 40139.0, 60 sec: 24029.8, 300 sec: 30935.2). Total num frames: 2499698688. Throughput: 0: 6226.5. Samples: 374902302. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:13,978][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:14,601][626795] Updated weights for policy 0, policy_version 305142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:16,967][626795] Updated weights for policy 0, policy_version 305152 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:18,975][24592] Fps is (10 sec: 38501.8, 60 sec: 25535.6, 300 sec: 30935.2). Total num frames: 2499878912. Throughput: 0: 6930.9. Samples: 374958726. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:18,979][24592] Avg episode reward: [(0, '4.346')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:19,082][626795] Updated weights for policy 0, policy_version 305162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:21,059][626795] Updated weights for policy 0, policy_version 305172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:23,041][626795] Updated weights for policy 0, policy_version 305182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:23,975][24592] Fps is (10 sec: 38504.7, 60 sec: 27583.2, 300 sec: 30935.2). Total num frames: 2500083712. Throughput: 0: 7719.5. Samples: 375019644. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:23,977][24592] Avg episode reward: [(0, '4.499')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:25,212][626795] Updated weights for policy 0, policy_version 305192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:27,257][626795] Updated weights for policy 0, policy_version 305202 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:28,976][24592] Fps is (10 sec: 40140.3, 60 sec: 29354.7, 300 sec: 30935.2). Total num frames: 2500280320. Throughput: 0: 8082.9. Samples: 375048108. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:28,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:29,323][626795] Updated weights for policy 0, policy_version 305212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:31,283][626795] Updated weights for policy 0, policy_version 305222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:33,322][626795] Updated weights for policy 0, policy_version 305232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:33,976][24592] Fps is (10 sec: 40138.2, 60 sec: 31402.3, 300 sec: 31018.5). Total num frames: 2500485120. Throughput: 0: 8956.8. Samples: 375109176. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:33,977][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:35,265][626795] Updated weights for policy 0, policy_version 305242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:37,375][626795] Updated weights for policy 0, policy_version 305252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:38,975][24592] Fps is (10 sec: 40960.8, 60 sec: 33314.2, 300 sec: 31018.5). Total num frames: 2500689920. Throughput: 0: 9722.1. Samples: 375170310. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:38,977][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:39,413][626795] Updated weights for policy 0, policy_version 305262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:41,360][626795] Updated weights for policy 0, policy_version 305272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:43,404][626795] Updated weights for policy 0, policy_version 305282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:43,978][24592] Fps is (10 sec: 40133.0, 60 sec: 35501.1, 300 sec: 31018.3). Total num frames: 2500886528. Throughput: 0: 9992.6. Samples: 375200364. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:43,979][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:45,496][626795] Updated weights for policy 0, policy_version 305292 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:47,559][626795] Updated weights for policy 0, policy_version 305302 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:48,975][24592] Fps is (10 sec: 37683.3, 60 sec: 37137.5, 300 sec: 30935.2). Total num frames: 2501066752. Throughput: 0: 9966.0. Samples: 375260742. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:48,978][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:50,478][626795] Updated weights for policy 0, policy_version 305312 (0.0031)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:52,854][626795] Updated weights for policy 0, policy_version 305322 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:53,975][24592] Fps is (10 sec: 35234.7, 60 sec: 38507.0, 300 sec: 30851.9). Total num frames: 2501238784. Throughput: 0: 9693.6. Samples: 375308658. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:53,976][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:54,946][626795] Updated weights for policy 0, policy_version 305332 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:56,907][626795] Updated weights for policy 0, policy_version 305342 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:13:58,856][626795] Updated weights for policy 0, policy_version 305352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:58,975][24592] Fps is (10 sec: 37683.2, 60 sec: 39185.5, 300 sec: 30879.7). Total num frames: 2501443584. Throughput: 0: 9702.5. Samples: 375338910. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:13:58,976][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:00,910][626795] Updated weights for policy 0, policy_version 305362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:02,907][626795] Updated weights for policy 0, policy_version 305372 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:03,976][24592] Fps is (10 sec: 40958.4, 60 sec: 39184.9, 300 sec: 30935.2). Total num frames: 2501648384. Throughput: 0: 9809.8. Samples: 375400170. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:03,979][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:04,883][626795] Updated weights for policy 0, policy_version 305382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:06,940][626795] Updated weights for policy 0, policy_version 305392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:08,843][626795] Updated weights for policy 0, policy_version 305402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:08,975][24592] Fps is (10 sec: 40959.6, 60 sec: 39321.5, 300 sec: 30963.0). Total num frames: 2501853184. Throughput: 0: 9819.3. Samples: 375461514. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:08,977][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:11,007][626795] Updated weights for policy 0, policy_version 305412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:13,022][626795] Updated weights for policy 0, policy_version 305422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:13,975][24592] Fps is (10 sec: 40141.7, 60 sec: 39185.4, 300 sec: 30991.2). Total num frames: 2502049792. Throughput: 0: 9848.4. Samples: 375491286. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:13,976][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:14,948][626795] Updated weights for policy 0, policy_version 305432 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:16,976][626795] Updated weights for policy 0, policy_version 305442 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:18,976][24592] Fps is (10 sec: 40140.2, 60 sec: 39594.6, 300 sec: 31018.5). Total num frames: 2502254592. Throughput: 0: 9848.7. Samples: 375552366. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:18,977][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:19,067][626795] Updated weights for policy 0, policy_version 305452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:21,470][626795] Updated weights for policy 0, policy_version 305462 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:23,497][626795] Updated weights for policy 0, policy_version 305472 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:23,976][24592] Fps is (10 sec: 39319.9, 60 sec: 39321.2, 300 sec: 31046.2). Total num frames: 2502443008. Throughput: 0: 9742.9. Samples: 375608748. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:23,978][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:25,606][626795] Updated weights for policy 0, policy_version 305482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:27,522][626795] Updated weights for policy 0, policy_version 305492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:28,975][24592] Fps is (10 sec: 38503.4, 60 sec: 39321.8, 300 sec: 31046.3). Total num frames: 2502639616. Throughput: 0: 9739.6. Samples: 375638622. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:28,978][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:29,713][626795] Updated weights for policy 0, policy_version 305502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:31,677][626795] Updated weights for policy 0, policy_version 305512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:33,664][626795] Updated weights for policy 0, policy_version 305522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:33,975][24592] Fps is (10 sec: 40143.2, 60 sec: 39322.0, 300 sec: 31185.1). Total num frames: 2502844416. Throughput: 0: 9743.7. Samples: 375699210. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:33,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:35,759][626795] Updated weights for policy 0, policy_version 305532 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:37,786][626795] Updated weights for policy 0, policy_version 305542 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:38,976][24592] Fps is (10 sec: 40959.1, 60 sec: 39321.5, 300 sec: 31351.8). Total num frames: 2503049216. Throughput: 0: 10039.3. Samples: 375760428. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:38,988][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:39,700][626795] Updated weights for policy 0, policy_version 305552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:41,769][626795] Updated weights for policy 0, policy_version 305562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:43,756][626795] Updated weights for policy 0, policy_version 305572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:43,976][24592] Fps is (10 sec: 40959.0, 60 sec: 39459.7, 300 sec: 31490.7). Total num frames: 2503254016. Throughput: 0: 10032.6. Samples: 375790380. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:43,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:45,953][626795] Updated weights for policy 0, policy_version 305582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:47,678][626795] Updated weights for policy 0, policy_version 305592 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:48,975][24592] Fps is (10 sec: 40141.6, 60 sec: 39731.2, 300 sec: 31629.6). Total num frames: 2503450624. Throughput: 0: 10039.4. Samples: 375851940. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:48,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:49,827][626795] Updated weights for policy 0, policy_version 305602 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:51,754][626795] Updated weights for policy 0, policy_version 305612 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:53,975][24592] Fps is (10 sec: 39322.1, 60 sec: 40140.7, 300 sec: 31740.5). Total num frames: 2503647232. Throughput: 0: 9952.3. Samples: 375909366. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:53,977][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:54,419][626795] Updated weights for policy 0, policy_version 305622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:56,312][626795] Updated weights for policy 0, policy_version 305632 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:14:58,299][626795] Updated weights for policy 0, policy_version 305642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:58,975][24592] Fps is (10 sec: 39321.7, 60 sec: 40004.3, 300 sec: 31907.2). Total num frames: 2503843840. Throughput: 0: 9939.9. Samples: 375938580. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:14:58,977][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:00,308][626795] Updated weights for policy 0, policy_version 305652 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:01,195][626772] Signal inference workers to stop experience collection... (4900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:01,196][626772] Signal inference workers to resume experience collection... (4900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:01,206][626795] InferenceWorker_p0-w0: stopping experience collection (4900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:01,211][626795] InferenceWorker_p0-w0: resuming experience collection (4900 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:02,385][626795] Updated weights for policy 0, policy_version 305662 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:03,975][24592] Fps is (10 sec: 39321.9, 60 sec: 39868.0, 300 sec: 32046.0). Total num frames: 2504040448. Throughput: 0: 9936.5. Samples: 375999504. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:03,978][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000305669_2504040448.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:04,057][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000304741_2496438272.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:04,484][626795] Updated weights for policy 0, policy_version 305672 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:06,550][626795] Updated weights for policy 0, policy_version 305682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:08,429][626795] Updated weights for policy 0, policy_version 305692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:08,976][24592] Fps is (10 sec: 39318.3, 60 sec: 39730.7, 300 sec: 32157.0). Total num frames: 2504237056. Throughput: 0: 10012.6. Samples: 376059318. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:08,977][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:10,470][626795] Updated weights for policy 0, policy_version 305702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:12,541][626795] Updated weights for policy 0, policy_version 305712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:13,976][24592] Fps is (10 sec: 40956.6, 60 sec: 40003.8, 300 sec: 32323.9). Total num frames: 2504450048. Throughput: 0: 10015.9. Samples: 376089348. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:13,978][24592] Avg episode reward: [(0, '4.942')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:14,598][626795] Updated weights for policy 0, policy_version 305722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:16,590][626795] Updated weights for policy 0, policy_version 305732 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:18,573][626795] Updated weights for policy 0, policy_version 305742 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:18,976][24592] Fps is (10 sec: 40961.2, 60 sec: 39867.6, 300 sec: 32379.2). Total num frames: 2504646656. Throughput: 0: 10024.0. Samples: 376150296. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:18,977][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:20,664][626795] Updated weights for policy 0, policy_version 305752 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:22,658][626795] Updated weights for policy 0, policy_version 305762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:23,977][24592] Fps is (10 sec: 40139.1, 60 sec: 40140.3, 300 sec: 32379.2). Total num frames: 2504851456. Throughput: 0: 10029.5. Samples: 376211766. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:23,981][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:24,563][626795] Updated weights for policy 0, policy_version 305772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:27,139][626795] Updated weights for policy 0, policy_version 305782 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:28,976][24592] Fps is (10 sec: 39320.9, 60 sec: 40003.8, 300 sec: 32462.5). Total num frames: 2505039872. Throughput: 0: 9927.1. Samples: 376237104. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:28,978][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:29,261][626795] Updated weights for policy 0, policy_version 305792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:31,171][626795] Updated weights for policy 0, policy_version 305802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:33,162][626795] Updated weights for policy 0, policy_version 305812 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:33,975][24592] Fps is (10 sec: 39326.8, 60 sec: 40004.3, 300 sec: 32573.6). Total num frames: 2505244672. Throughput: 0: 9916.4. Samples: 376298178. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:33,976][24592] Avg episode reward: [(0, '4.872')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:35,217][626795] Updated weights for policy 0, policy_version 305822 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:37,359][626795] Updated weights for policy 0, policy_version 305832 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:38,975][24592] Fps is (10 sec: 40143.7, 60 sec: 39867.9, 300 sec: 32712.5). Total num frames: 2505441280. Throughput: 0: 9993.2. Samples: 376359060. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:38,976][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:39,389][626795] Updated weights for policy 0, policy_version 305842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:41,280][626795] Updated weights for policy 0, policy_version 305852 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:43,281][626795] Updated weights for policy 0, policy_version 305862 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:43,975][24592] Fps is (10 sec: 40140.6, 60 sec: 39867.9, 300 sec: 32851.3). Total num frames: 2505646080. Throughput: 0: 10008.4. Samples: 376388958. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:43,978][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:45,325][626795] Updated weights for policy 0, policy_version 305872 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:47,338][626795] Updated weights for policy 0, policy_version 305882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:48,976][24592] Fps is (10 sec: 40139.8, 60 sec: 39867.6, 300 sec: 32962.4). Total num frames: 2505842688. Throughput: 0: 10010.0. Samples: 376449954. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:48,977][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:49,400][626795] Updated weights for policy 0, policy_version 305892 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:51,498][626795] Updated weights for policy 0, policy_version 305902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:53,512][626795] Updated weights for policy 0, policy_version 305912 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:53,976][24592] Fps is (10 sec: 40138.6, 60 sec: 40004.0, 300 sec: 33129.1). Total num frames: 2506047488. Throughput: 0: 10015.5. Samples: 376510014. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:53,977][24592] Avg episode reward: [(0, '4.898')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:55,660][626795] Updated weights for policy 0, policy_version 305922 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:15:57,495][626795] Updated weights for policy 0, policy_version 305932 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:59,116][24592] Fps is (10 sec: 37970.6, 60 sec: 39638.5, 300 sec: 33168.8). Total num frames: 2506227712. Throughput: 0: 9980.6. Samples: 376539864. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:15:59,117][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:00,096][626795] Updated weights for policy 0, policy_version 305942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:02,109][626795] Updated weights for policy 0, policy_version 305952 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:03,976][24592] Fps is (10 sec: 38501.0, 60 sec: 39867.1, 300 sec: 33378.8). Total num frames: 2506432512. Throughput: 0: 9903.4. Samples: 376595952. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:03,978][24592] Avg episode reward: [(0, '5.065')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:04,069][626795] Updated weights for policy 0, policy_version 305962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:06,164][626795] Updated weights for policy 0, policy_version 305972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:08,123][626795] Updated weights for policy 0, policy_version 305982 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:08,975][24592] Fps is (10 sec: 41542.4, 60 sec: 40004.8, 300 sec: 33462.2). Total num frames: 2506637312. Throughput: 0: 9894.3. Samples: 376656996. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:08,977][24592] Avg episode reward: [(0, '5.040')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:10,216][626795] Updated weights for policy 0, policy_version 305992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:12,094][626795] Updated weights for policy 0, policy_version 306002 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:13,975][24592] Fps is (10 sec: 40144.3, 60 sec: 39731.7, 300 sec: 33573.3). Total num frames: 2506833920. Throughput: 0: 10009.3. Samples: 376687518. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:13,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:14,211][626795] Updated weights for policy 0, policy_version 306012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:16,185][626795] Updated weights for policy 0, policy_version 306022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:18,248][626795] Updated weights for policy 0, policy_version 306032 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:18,976][24592] Fps is (10 sec: 40140.5, 60 sec: 39868.0, 300 sec: 33712.2). Total num frames: 2507038720. Throughput: 0: 10002.1. Samples: 376748274. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:18,978][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:20,294][626795] Updated weights for policy 0, policy_version 306042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:22,340][626795] Updated weights for policy 0, policy_version 306052 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:23,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39868.6, 300 sec: 33878.8). Total num frames: 2507243520. Throughput: 0: 10014.4. Samples: 376809708. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:23,978][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:24,209][626795] Updated weights for policy 0, policy_version 306062 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:26,280][626795] Updated weights for policy 0, policy_version 306072 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:28,184][626795] Updated weights for policy 0, policy_version 306082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:28,976][24592] Fps is (10 sec: 40959.2, 60 sec: 40141.0, 300 sec: 33962.1). Total num frames: 2507448320. Throughput: 0: 10029.3. Samples: 376840278. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:28,977][24592] Avg episode reward: [(0, '5.008')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:30,306][626795] Updated weights for policy 0, policy_version 306092 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:32,829][626795] Updated weights for policy 0, policy_version 306102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:33,976][24592] Fps is (10 sec: 39320.5, 60 sec: 39867.5, 300 sec: 34017.6). Total num frames: 2507636736. Throughput: 0: 9918.6. Samples: 376896294. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:33,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:34,858][626795] Updated weights for policy 0, policy_version 306112 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:36,836][626795] Updated weights for policy 0, policy_version 306122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:38,879][626795] Updated weights for policy 0, policy_version 306132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:38,975][24592] Fps is (10 sec: 38503.4, 60 sec: 39867.7, 300 sec: 34157.8). Total num frames: 2507833344. Throughput: 0: 9928.6. Samples: 376956798. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:38,976][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:40,899][626795] Updated weights for policy 0, policy_version 306142 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:42,941][626795] Updated weights for policy 0, policy_version 306152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:43,975][24592] Fps is (10 sec: 40142.0, 60 sec: 39867.7, 300 sec: 34573.0). Total num frames: 2508038144. Throughput: 0: 9973.1. Samples: 376987254. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:43,977][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:44,864][626795] Updated weights for policy 0, policy_version 306162 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:46,951][626795] Updated weights for policy 0, policy_version 306172 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:48,888][626795] Updated weights for policy 0, policy_version 306182 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:48,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40004.4, 300 sec: 34962.2). Total num frames: 2508242944. Throughput: 0: 10062.6. Samples: 377048760. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:48,976][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:50,947][626795] Updated weights for policy 0, policy_version 306192 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:52,869][626795] Updated weights for policy 0, policy_version 306202 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:53,976][24592] Fps is (10 sec: 40958.3, 60 sec: 40004.4, 300 sec: 35407.1). Total num frames: 2508447744. Throughput: 0: 10074.2. Samples: 377110338. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:53,977][24592] Avg episode reward: [(0, '4.821')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:54,867][626795] Updated weights for policy 0, policy_version 306212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:57,080][626795] Updated weights for policy 0, policy_version 306222 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:58,975][24592] Fps is (10 sec: 39321.6, 60 sec: 40234.8, 300 sec: 35712.6). Total num frames: 2508636160. Throughput: 0: 10062.3. Samples: 377140320. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:16:58,979][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:16:59,282][626795] Updated weights for policy 0, policy_version 306232 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:01,407][626795] Updated weights for policy 0, policy_version 306242 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:03,392][626795] Updated weights for policy 0, policy_version 306252 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:03,975][24592] Fps is (10 sec: 39322.7, 60 sec: 40141.3, 300 sec: 36128.1). Total num frames: 2508840960. Throughput: 0: 10004.1. Samples: 377198460. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:03,977][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000306255_2508840960.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:04,579][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000305090_2499297280.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:05,921][626795] Updated weights for policy 0, policy_version 306262 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:07,885][626795] Updated weights for policy 0, policy_version 306272 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:08,976][24592] Fps is (10 sec: 38500.1, 60 sec: 39730.8, 300 sec: 36489.1). Total num frames: 2509021184. Throughput: 0: 9867.1. Samples: 377253732. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:08,977][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:09,919][626795] Updated weights for policy 0, policy_version 306282 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:11,891][626795] Updated weights for policy 0, policy_version 306292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:13,882][626795] Updated weights for policy 0, policy_version 306302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:13,975][24592] Fps is (10 sec: 38502.9, 60 sec: 39867.8, 300 sec: 36879.0). Total num frames: 2509225984. Throughput: 0: 9855.8. Samples: 377283786. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:13,978][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:15,914][626795] Updated weights for policy 0, policy_version 306312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:17,911][626795] Updated weights for policy 0, policy_version 306322 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:18,976][24592] Fps is (10 sec: 40960.4, 60 sec: 39867.5, 300 sec: 37295.3). Total num frames: 2509430784. Throughput: 0: 9988.2. Samples: 377345766. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:18,977][24592] Avg episode reward: [(0, '4.471')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:19,963][626795] Updated weights for policy 0, policy_version 306332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:21,845][626795] Updated weights for policy 0, policy_version 306342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:23,910][626795] Updated weights for policy 0, policy_version 306352 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:23,975][24592] Fps is (10 sec: 40960.0, 60 sec: 39867.7, 300 sec: 37683.2). Total num frames: 2509635584. Throughput: 0: 10001.1. Samples: 377406846. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:23,977][24592] Avg episode reward: [(0, '4.854')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:25,875][626795] Updated weights for policy 0, policy_version 306362 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:27,840][626795] Updated weights for policy 0, policy_version 306372 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:28,976][24592] Fps is (10 sec: 40961.3, 60 sec: 39867.8, 300 sec: 38099.7). Total num frames: 2509840384. Throughput: 0: 10006.4. Samples: 377437542. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:28,978][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:29,878][626795] Updated weights for policy 0, policy_version 306382 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:32,006][626795] Updated weights for policy 0, policy_version 306392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:33,890][626795] Updated weights for policy 0, policy_version 306402 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:33,984][24592] Fps is (10 sec: 40924.8, 60 sec: 40135.3, 300 sec: 38487.4). Total num frames: 2510045184. Throughput: 0: 9999.7. Samples: 377498832. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:33,988][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:36,011][626795] Updated weights for policy 0, policy_version 306412 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:38,389][626795] Updated weights for policy 0, policy_version 306422 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:38,975][24592] Fps is (10 sec: 39322.3, 60 sec: 40004.3, 300 sec: 38905.9). Total num frames: 2510233600. Throughput: 0: 9905.3. Samples: 377556072. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:38,976][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:40,465][626795] Updated weights for policy 0, policy_version 306432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:42,399][626795] Updated weights for policy 0, policy_version 306442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:43,976][24592] Fps is (10 sec: 39354.0, 60 sec: 40004.0, 300 sec: 39321.6). Total num frames: 2510438400. Throughput: 0: 9910.3. Samples: 377586288. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:43,977][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:44,416][626795] Updated weights for policy 0, policy_version 306452 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:46,322][626795] Updated weights for policy 0, policy_version 306462 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:48,359][626795] Updated weights for policy 0, policy_version 306472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:48,976][24592] Fps is (10 sec: 40958.3, 60 sec: 40004.0, 300 sec: 39711.3). Total num frames: 2510643200. Throughput: 0: 9998.1. Samples: 377648376. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:48,978][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:50,342][626795] Updated weights for policy 0, policy_version 306482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:52,377][626795] Updated weights for policy 0, policy_version 306492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:53,975][24592] Fps is (10 sec: 40961.6, 60 sec: 40004.5, 300 sec: 39849.3). Total num frames: 2510848000. Throughput: 0: 10129.7. Samples: 377709564. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:53,978][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:54,423][626795] Updated weights for policy 0, policy_version 306502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:56,343][626795] Updated weights for policy 0, policy_version 306512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:17:58,398][626795] Updated weights for policy 0, policy_version 306522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:58,975][24592] Fps is (10 sec: 40142.3, 60 sec: 40140.8, 300 sec: 39821.5). Total num frames: 2511044608. Throughput: 0: 10135.1. Samples: 377739864. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:17:58,976][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:00,427][626795] Updated weights for policy 0, policy_version 306532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:02,426][626795] Updated weights for policy 0, policy_version 306542 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:03,975][24592] Fps is (10 sec: 40140.9, 60 sec: 40140.9, 300 sec: 39849.2). Total num frames: 2511249408. Throughput: 0: 10108.5. Samples: 377800644. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:03,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:04,541][626795] Updated weights for policy 0, policy_version 306552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:06,489][626795] Updated weights for policy 0, policy_version 306562 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:08,424][626795] Updated weights for policy 0, policy_version 306572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:08,976][24592] Fps is (10 sec: 40959.7, 60 sec: 40550.7, 300 sec: 39849.3). Total num frames: 2511454208. Throughput: 0: 10114.0. Samples: 377861976. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:08,978][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:11,018][626795] Updated weights for policy 0, policy_version 306582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:13,001][626795] Updated weights for policy 0, policy_version 306592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:13,975][24592] Fps is (10 sec: 38502.3, 60 sec: 40140.8, 300 sec: 39849.2). Total num frames: 2511634432. Throughput: 0: 9992.7. Samples: 377887212. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:13,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:14,983][626795] Updated weights for policy 0, policy_version 306602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:16,987][626795] Updated weights for policy 0, policy_version 306612 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:18,976][24592] Fps is (10 sec: 38502.3, 60 sec: 40141.0, 300 sec: 39849.2). Total num frames: 2511839232. Throughput: 0: 9997.5. Samples: 377948634. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:18,977][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:19,055][626795] Updated weights for policy 0, policy_version 306622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:20,939][626795] Updated weights for policy 0, policy_version 306632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:23,206][626795] Updated weights for policy 0, policy_version 306642 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:23,975][24592] Fps is (10 sec: 40140.6, 60 sec: 40004.2, 300 sec: 39849.2). Total num frames: 2512035840. Throughput: 0: 10026.4. Samples: 378007260. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:23,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:25,261][626795] Updated weights for policy 0, policy_version 306652 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:27,211][626795] Updated weights for policy 0, policy_version 306662 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:28,975][24592] Fps is (10 sec: 40141.2, 60 sec: 40004.4, 300 sec: 39849.3). Total num frames: 2512240640. Throughput: 0: 10041.0. Samples: 378038130. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:28,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:29,229][626795] Updated weights for policy 0, policy_version 306672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:31,255][626795] Updated weights for policy 0, policy_version 306682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:33,546][626795] Updated weights for policy 0, policy_version 306692 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:33,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39873.4, 300 sec: 39821.5). Total num frames: 2512437248. Throughput: 0: 10002.6. Samples: 378098490. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:33,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:35,548][626795] Updated weights for policy 0, policy_version 306702 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:37,536][626795] Updated weights for policy 0, policy_version 306712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:38,975][24592] Fps is (10 sec: 40140.9, 60 sec: 40140.8, 300 sec: 39849.6). Total num frames: 2512642048. Throughput: 0: 9978.1. Samples: 378158580. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:38,977][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:39,500][626795] Updated weights for policy 0, policy_version 306722 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:41,554][626795] Updated weights for policy 0, policy_version 306732 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:43,956][626795] Updated weights for policy 0, policy_version 306742 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:43,975][24592] Fps is (10 sec: 39321.6, 60 sec: 39868.0, 300 sec: 39877.0). Total num frames: 2512830464. Throughput: 0: 9989.6. Samples: 378189396. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:43,977][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:45,934][626795] Updated weights for policy 0, policy_version 306752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:47,882][626795] Updated weights for policy 0, policy_version 306762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:48,977][24592] Fps is (10 sec: 39317.0, 60 sec: 39867.2, 300 sec: 39987.9). Total num frames: 2513035264. Throughput: 0: 9912.4. Samples: 378246714. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:48,978][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:50,147][626795] Updated weights for policy 0, policy_version 306772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:52,608][626795] Updated weights for policy 0, policy_version 306782 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:53,975][24592] Fps is (10 sec: 38502.4, 60 sec: 39458.1, 300 sec: 39904.8). Total num frames: 2513215488. Throughput: 0: 9759.1. Samples: 378301134. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:53,976][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:54,602][626795] Updated weights for policy 0, policy_version 306792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:56,813][626795] Updated weights for policy 0, policy_version 306802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:18:58,843][626795] Updated weights for policy 0, policy_version 306812 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:58,975][24592] Fps is (10 sec: 36868.5, 60 sec: 39321.6, 300 sec: 39849.3). Total num frames: 2513403904. Throughput: 0: 9831.9. Samples: 378329646. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:18:58,977][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:00,713][626795] Updated weights for policy 0, policy_version 306822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:02,777][626795] Updated weights for policy 0, policy_version 306832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:03,975][24592] Fps is (10 sec: 40140.5, 60 sec: 39458.0, 300 sec: 39877.0). Total num frames: 2513616896. Throughput: 0: 9846.7. Samples: 378391734. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:03,978][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000306838_2513616896.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:04,134][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000305669_2504040448.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:04,863][626795] Updated weights for policy 0, policy_version 306842 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:06,921][626795] Updated weights for policy 0, policy_version 306852 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:08,842][626795] Updated weights for policy 0, policy_version 306862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:08,975][24592] Fps is (10 sec: 40959.3, 60 sec: 39321.6, 300 sec: 39877.0). Total num frames: 2513813504. Throughput: 0: 9894.9. Samples: 378452532. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:08,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:11,137][626795] Updated weights for policy 0, policy_version 306872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:13,395][626795] Updated weights for policy 0, policy_version 306882 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:13,975][24592] Fps is (10 sec: 37683.1, 60 sec: 39321.5, 300 sec: 39793.7). Total num frames: 2513993728. Throughput: 0: 9817.4. Samples: 378479916. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:13,977][24592] Avg episode reward: [(0, '5.026')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:16,136][626795] Updated weights for policy 0, policy_version 306892 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:18,088][626795] Updated weights for policy 0, policy_version 306902 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:18,976][24592] Fps is (10 sec: 35224.2, 60 sec: 38775.2, 300 sec: 39738.1). Total num frames: 2514165760. Throughput: 0: 9608.3. Samples: 378530868. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:18,982][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:20,389][626795] Updated weights for policy 0, policy_version 306912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:22,455][626795] Updated weights for policy 0, policy_version 306922 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:23,975][24592] Fps is (10 sec: 36864.5, 60 sec: 38775.5, 300 sec: 39738.1). Total num frames: 2514362368. Throughput: 0: 9576.4. Samples: 378589518. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:23,978][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:24,576][626795] Updated weights for policy 0, policy_version 306932 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:26,663][626795] Updated weights for policy 0, policy_version 306942 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:28,703][626795] Updated weights for policy 0, policy_version 306952 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:28,976][24592] Fps is (10 sec: 39323.6, 60 sec: 38638.9, 300 sec: 39710.4). Total num frames: 2514558976. Throughput: 0: 9515.9. Samples: 378617610. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:28,978][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:30,827][626795] Updated weights for policy 0, policy_version 306962 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:32,984][626795] Updated weights for policy 0, policy_version 306972 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:33,975][24592] Fps is (10 sec: 38502.3, 60 sec: 38502.4, 300 sec: 39654.9). Total num frames: 2514747392. Throughput: 0: 9554.3. Samples: 378676644. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:33,977][24592] Avg episode reward: [(0, '4.914')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:35,131][626795] Updated weights for policy 0, policy_version 306982 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:37,252][626795] Updated weights for policy 0, policy_version 306992 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:38,975][24592] Fps is (10 sec: 38502.4, 60 sec: 38365.9, 300 sec: 39627.1). Total num frames: 2514944000. Throughput: 0: 9622.1. Samples: 378734130. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:38,977][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:39,503][626795] Updated weights for policy 0, policy_version 307002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:41,643][626795] Updated weights for policy 0, policy_version 307012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:43,821][626795] Updated weights for policy 0, policy_version 307022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:43,976][24592] Fps is (10 sec: 37682.3, 60 sec: 38229.2, 300 sec: 39571.5). Total num frames: 2515124224. Throughput: 0: 9605.7. Samples: 378761904. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:43,977][24592] Avg episode reward: [(0, '4.402')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:45,780][626795] Updated weights for policy 0, policy_version 307032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:48,229][626795] Updated weights for policy 0, policy_version 307042 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:48,975][24592] Fps is (10 sec: 36864.1, 60 sec: 37957.0, 300 sec: 39543.8). Total num frames: 2515312640. Throughput: 0: 9545.6. Samples: 378821286. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:48,976][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:50,479][626795] Updated weights for policy 0, policy_version 307052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:52,753][626795] Updated weights for policy 0, policy_version 307062 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:53,978][24592] Fps is (10 sec: 37675.2, 60 sec: 38091.3, 300 sec: 39515.7). Total num frames: 2515501056. Throughput: 0: 9343.5. Samples: 378873012. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:53,979][24592] Avg episode reward: [(0, '4.871')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:54,777][626795] Updated weights for policy 0, policy_version 307072 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:56,837][626795] Updated weights for policy 0, policy_version 307082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:19:58,921][626795] Updated weights for policy 0, policy_version 307092 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:58,975][24592] Fps is (10 sec: 38502.4, 60 sec: 38229.3, 300 sec: 39516.0). Total num frames: 2515697664. Throughput: 0: 9392.0. Samples: 378902556. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:19:58,977][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:01,059][626795] Updated weights for policy 0, policy_version 307102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:03,111][626795] Updated weights for policy 0, policy_version 307112 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:03,975][24592] Fps is (10 sec: 39330.9, 60 sec: 37956.3, 300 sec: 39516.1). Total num frames: 2515894272. Throughput: 0: 9571.3. Samples: 378961572. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:03,976][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:05,105][626795] Updated weights for policy 0, policy_version 307122 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:07,110][626795] Updated weights for policy 0, policy_version 307132 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:08,975][24592] Fps is (10 sec: 40140.7, 60 sec: 38092.9, 300 sec: 39488.3). Total num frames: 2516099072. Throughput: 0: 9639.7. Samples: 379023306. Policy #0 lag: (min: 0.0, avg: 2.4, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:08,976][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:09,065][626795] Updated weights for policy 0, policy_version 307142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:11,245][626795] Updated weights for policy 0, policy_version 307152 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:13,271][626795] Updated weights for policy 0, policy_version 307162 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:13,976][24592] Fps is (10 sec: 40138.6, 60 sec: 38365.6, 300 sec: 39488.2). Total num frames: 2516295680. Throughput: 0: 9666.7. Samples: 379052616. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:13,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:15,231][626795] Updated weights for policy 0, policy_version 307172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:17,449][626795] Updated weights for policy 0, policy_version 307182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:18,975][24592] Fps is (10 sec: 38502.5, 60 sec: 38639.3, 300 sec: 39432.8). Total num frames: 2516484096. Throughput: 0: 9662.1. Samples: 379111440. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:18,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:19,563][626795] Updated weights for policy 0, policy_version 307192 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:22,227][626795] Updated weights for policy 0, policy_version 307202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:23,975][24592] Fps is (10 sec: 36046.7, 60 sec: 38229.3, 300 sec: 39377.2). Total num frames: 2516656128. Throughput: 0: 9544.4. Samples: 379163628. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:23,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:24,521][626795] Updated weights for policy 0, policy_version 307212 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:27,677][626795] Updated weights for policy 0, policy_version 307222 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:28,976][24592] Fps is (10 sec: 32767.2, 60 sec: 37546.5, 300 sec: 39210.5). Total num frames: 2516811776. Throughput: 0: 9436.4. Samples: 379186542. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:28,978][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:30,003][626795] Updated weights for policy 0, policy_version 307232 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:32,361][626795] Updated weights for policy 0, policy_version 307242 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:33,975][24592] Fps is (10 sec: 32768.1, 60 sec: 37273.6, 300 sec: 39127.2). Total num frames: 2516983808. Throughput: 0: 9195.7. Samples: 379235094. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:33,976][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:34,703][626795] Updated weights for policy 0, policy_version 307252 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:36,760][626795] Updated weights for policy 0, policy_version 307262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:38,850][626795] Updated weights for policy 0, policy_version 307272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:38,975][24592] Fps is (10 sec: 36865.1, 60 sec: 37273.6, 300 sec: 39099.4). Total num frames: 2517180416. Throughput: 0: 9313.4. Samples: 379292094. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:38,976][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:40,967][626795] Updated weights for policy 0, policy_version 307282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:43,063][626795] Updated weights for policy 0, policy_version 307292 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:43,975][24592] Fps is (10 sec: 38502.3, 60 sec: 37410.3, 300 sec: 39071.7). Total num frames: 2517368832. Throughput: 0: 9304.1. Samples: 379321242. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:43,978][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:45,222][626795] Updated weights for policy 0, policy_version 307302 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:47,209][626795] Updated weights for policy 0, policy_version 307312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:48,975][24592] Fps is (10 sec: 37683.1, 60 sec: 37410.1, 300 sec: 39016.2). Total num frames: 2517557248. Throughput: 0: 9289.2. Samples: 379379586. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:48,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:49,307][626795] Updated weights for policy 0, policy_version 307322 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:51,417][626795] Updated weights for policy 0, policy_version 307332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:53,975][24592] Fps is (10 sec: 36045.0, 60 sec: 37138.5, 300 sec: 39006.9). Total num frames: 2517729280. Throughput: 0: 9141.1. Samples: 379434654. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:53,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:54,144][626795] Updated weights for policy 0, policy_version 307342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:56,301][626795] Updated weights for policy 0, policy_version 307352 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:20:58,528][626795] Updated weights for policy 0, policy_version 307362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:58,975][24592] Fps is (10 sec: 36044.7, 60 sec: 37000.5, 300 sec: 38932.9). Total num frames: 2517917696. Throughput: 0: 9061.7. Samples: 379460388. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:20:58,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:01,322][626795] Updated weights for policy 0, policy_version 307372 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:03,581][626795] Updated weights for policy 0, policy_version 307382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:03,976][24592] Fps is (10 sec: 35221.6, 60 sec: 36453.7, 300 sec: 38793.8). Total num frames: 2518081536. Throughput: 0: 8848.5. Samples: 379509630. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:03,979][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000307383_2518081536.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:04,143][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000306255_2508840960.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:05,982][626795] Updated weights for policy 0, policy_version 307392 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:08,011][626795] Updated weights for policy 0, policy_version 307402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:08,975][24592] Fps is (10 sec: 35225.6, 60 sec: 36181.3, 300 sec: 38766.2). Total num frames: 2518269952. Throughput: 0: 8933.3. Samples: 379565628. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:08,977][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:10,322][626795] Updated weights for policy 0, policy_version 307412 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:12,492][626795] Updated weights for policy 0, policy_version 307422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:13,975][24592] Fps is (10 sec: 36868.1, 60 sec: 35908.6, 300 sec: 38682.9). Total num frames: 2518450176. Throughput: 0: 9027.5. Samples: 379592778. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:13,977][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:14,738][626795] Updated weights for policy 0, policy_version 307432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:16,816][626795] Updated weights for policy 0, policy_version 307442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:18,810][626795] Updated weights for policy 0, policy_version 307452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:18,975][24592] Fps is (10 sec: 38502.6, 60 sec: 36181.4, 300 sec: 38682.9). Total num frames: 2518654976. Throughput: 0: 9226.0. Samples: 379650264. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:18,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:20,875][626795] Updated weights for policy 0, policy_version 307462 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:22,942][626795] Updated weights for policy 0, policy_version 307472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:23,976][24592] Fps is (10 sec: 40139.2, 60 sec: 36590.7, 300 sec: 38655.1). Total num frames: 2518851584. Throughput: 0: 9317.0. Samples: 379711362. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:23,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:24,808][626795] Updated weights for policy 0, policy_version 307482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:27,478][626795] Updated weights for policy 0, policy_version 307492 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:28,976][24592] Fps is (10 sec: 37680.9, 60 sec: 37000.3, 300 sec: 38627.3). Total num frames: 2519031808. Throughput: 0: 9223.9. Samples: 379736322. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:28,978][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:29,495][626795] Updated weights for policy 0, policy_version 307502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:31,511][626795] Updated weights for policy 0, policy_version 307512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:33,610][626795] Updated weights for policy 0, policy_version 307522 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:33,975][24592] Fps is (10 sec: 38503.8, 60 sec: 37546.7, 300 sec: 38655.1). Total num frames: 2519236608. Throughput: 0: 9257.1. Samples: 379796154. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:33,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:35,534][626795] Updated weights for policy 0, policy_version 307532 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:37,664][626795] Updated weights for policy 0, policy_version 307542 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:38,975][24592] Fps is (10 sec: 40962.5, 60 sec: 37683.2, 300 sec: 38655.1). Total num frames: 2519441408. Throughput: 0: 9397.1. Samples: 379857522. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:38,977][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:39,552][626795] Updated weights for policy 0, policy_version 307552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:41,624][626795] Updated weights for policy 0, policy_version 307562 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:43,615][626795] Updated weights for policy 0, policy_version 307572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:43,975][24592] Fps is (10 sec: 40141.0, 60 sec: 37819.8, 300 sec: 38627.4). Total num frames: 2519638016. Throughput: 0: 9501.6. Samples: 379887960. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:43,977][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:45,617][626795] Updated weights for policy 0, policy_version 307582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:47,483][626795] Updated weights for policy 0, policy_version 307592 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:48,975][24592] Fps is (10 sec: 40959.7, 60 sec: 38229.3, 300 sec: 38655.2). Total num frames: 2519851008. Throughput: 0: 9809.4. Samples: 379951044. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:48,977][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:49,471][626795] Updated weights for policy 0, policy_version 307602 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:51,691][626795] Updated weights for policy 0, policy_version 307612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:53,689][626795] Updated weights for policy 0, policy_version 307622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:53,975][24592] Fps is (10 sec: 40140.6, 60 sec: 38502.4, 300 sec: 38655.1). Total num frames: 2520039424. Throughput: 0: 9858.5. Samples: 380009262. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:53,976][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:55,925][626795] Updated weights for policy 0, policy_version 307632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:21:57,918][626795] Updated weights for policy 0, policy_version 307642 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:59,383][24592] Fps is (10 sec: 36995.3, 60 sec: 38378.4, 300 sec: 38574.1). Total num frames: 2520236032. Throughput: 0: 9822.4. Samples: 380038788. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:21:59,386][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:00,480][626795] Updated weights for policy 0, policy_version 307652 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:02,492][626795] Updated weights for policy 0, policy_version 307662 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:03,976][24592] Fps is (10 sec: 38502.0, 60 sec: 39049.2, 300 sec: 38655.2). Total num frames: 2520424448. Throughput: 0: 9878.9. Samples: 380094816. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:03,976][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:04,422][626795] Updated weights for policy 0, policy_version 307672 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:06,526][626795] Updated weights for policy 0, policy_version 307682 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:08,458][626795] Updated weights for policy 0, policy_version 307692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:08,975][24592] Fps is (10 sec: 40991.6, 60 sec: 39321.6, 300 sec: 38655.1). Total num frames: 2520629248. Throughput: 0: 9874.1. Samples: 380155692. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:08,976][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:09,868][626772] Signal inference workers to stop experience collection... (4950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:09,874][626772] Signal inference workers to resume experience collection... (4950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:09,881][626795] InferenceWorker_p0-w0: stopping experience collection (4950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:09,887][626795] InferenceWorker_p0-w0: resuming experience collection (4950 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:10,549][626795] Updated weights for policy 0, policy_version 307702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:12,338][626795] Updated weights for policy 0, policy_version 307712 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:13,975][24592] Fps is (10 sec: 40960.5, 60 sec: 39731.2, 300 sec: 38655.2). Total num frames: 2520834048. Throughput: 0: 10018.1. Samples: 380187132. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:13,976][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:14,419][626795] Updated weights for policy 0, policy_version 307722 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:16,501][626795] Updated weights for policy 0, policy_version 307732 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:18,417][626795] Updated weights for policy 0, policy_version 307742 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:18,975][24592] Fps is (10 sec: 40960.0, 60 sec: 39731.2, 300 sec: 38655.1). Total num frames: 2521038848. Throughput: 0: 10046.8. Samples: 380248260. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:18,977][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:20,397][626795] Updated weights for policy 0, policy_version 307752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:22,546][626795] Updated weights for policy 0, policy_version 307762 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:23,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39868.0, 300 sec: 38655.2). Total num frames: 2521243648. Throughput: 0: 10038.5. Samples: 380309256. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:23,978][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:24,457][626795] Updated weights for policy 0, policy_version 307772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:26,460][626795] Updated weights for policy 0, policy_version 307782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:28,593][626795] Updated weights for policy 0, policy_version 307792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:28,976][24592] Fps is (10 sec: 40958.6, 60 sec: 40277.5, 300 sec: 38656.2). Total num frames: 2521448448. Throughput: 0: 10038.7. Samples: 380339706. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:28,977][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:30,480][626795] Updated weights for policy 0, policy_version 307802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:33,166][626795] Updated weights for policy 0, policy_version 307812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:33,978][24592] Fps is (10 sec: 38492.8, 60 sec: 39866.1, 300 sec: 38627.0). Total num frames: 2521628672. Throughput: 0: 9855.1. Samples: 380394546. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:33,979][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:35,168][626795] Updated weights for policy 0, policy_version 307822 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:37,175][626795] Updated weights for policy 0, policy_version 307832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:38,976][24592] Fps is (10 sec: 38503.1, 60 sec: 39867.6, 300 sec: 38627.4). Total num frames: 2521833472. Throughput: 0: 9913.4. Samples: 380455368. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:38,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:39,370][626795] Updated weights for policy 0, policy_version 307842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:41,213][626795] Updated weights for policy 0, policy_version 307852 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:43,207][626795] Updated weights for policy 0, policy_version 307862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:43,976][24592] Fps is (10 sec: 40149.9, 60 sec: 39867.6, 300 sec: 38599.6). Total num frames: 2522030080. Throughput: 0: 10016.8. Samples: 380485464. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:43,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:45,297][626795] Updated weights for policy 0, policy_version 307872 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:47,352][626795] Updated weights for policy 0, policy_version 307882 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:48,976][24592] Fps is (10 sec: 40140.2, 60 sec: 39731.0, 300 sec: 38599.5). Total num frames: 2522234880. Throughput: 0: 10041.0. Samples: 380546664. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:48,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:49,320][626795] Updated weights for policy 0, policy_version 307892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:51,162][626795] Updated weights for policy 0, policy_version 307902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:53,172][626795] Updated weights for policy 0, policy_version 307912 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:53,975][24592] Fps is (10 sec: 40960.9, 60 sec: 40004.3, 300 sec: 38627.4). Total num frames: 2522439680. Throughput: 0: 10080.3. Samples: 380609304. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:53,976][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:55,280][626795] Updated weights for policy 0, policy_version 307922 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:57,173][626795] Updated weights for policy 0, policy_version 307932 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:58,975][24592] Fps is (10 sec: 40961.3, 60 sec: 40415.2, 300 sec: 38627.4). Total num frames: 2522644480. Throughput: 0: 10058.9. Samples: 380639784. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:22:58,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:22:59,288][626795] Updated weights for policy 0, policy_version 307942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:01,304][626795] Updated weights for policy 0, policy_version 307952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:03,237][626795] Updated weights for policy 0, policy_version 307962 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40550.5, 300 sec: 38655.1). Total num frames: 2522857472. Throughput: 0: 10056.4. Samples: 380700798. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:03,976][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000307966_2522857472.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:04,099][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000306838_2513616896.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:05,836][626795] Updated weights for policy 0, policy_version 307972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:07,820][626795] Updated weights for policy 0, policy_version 307982 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:08,976][24592] Fps is (10 sec: 39321.1, 60 sec: 40140.7, 300 sec: 38655.1). Total num frames: 2523037696. Throughput: 0: 9946.6. Samples: 380756856. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:08,977][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:09,763][626795] Updated weights for policy 0, policy_version 307992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:11,841][626795] Updated weights for policy 0, policy_version 308002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:13,720][626795] Updated weights for policy 0, policy_version 308012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:13,975][24592] Fps is (10 sec: 37682.8, 60 sec: 40004.2, 300 sec: 38627.4). Total num frames: 2523234304. Throughput: 0: 9941.1. Samples: 380787054. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:13,977][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:15,755][626795] Updated weights for policy 0, policy_version 308022 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:17,637][626795] Updated weights for policy 0, policy_version 308032 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:18,975][24592] Fps is (10 sec: 40960.5, 60 sec: 40140.8, 300 sec: 38682.9). Total num frames: 2523447296. Throughput: 0: 10127.9. Samples: 380850276. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:18,976][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:19,732][626795] Updated weights for policy 0, policy_version 308042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:21,636][626795] Updated weights for policy 0, policy_version 308052 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:23,717][626795] Updated weights for policy 0, policy_version 308062 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:23,977][24592] Fps is (10 sec: 41773.2, 60 sec: 40139.8, 300 sec: 38682.7). Total num frames: 2523652096. Throughput: 0: 10140.1. Samples: 380911686. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:23,981][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:25,789][626795] Updated weights for policy 0, policy_version 308072 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:28,025][626795] Updated weights for policy 0, policy_version 308082 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:28,976][24592] Fps is (10 sec: 39320.9, 60 sec: 39867.8, 300 sec: 38655.1). Total num frames: 2523840512. Throughput: 0: 10128.3. Samples: 380941236. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:28,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:30,171][626795] Updated weights for policy 0, policy_version 308092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:32,148][626795] Updated weights for policy 0, policy_version 308102 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:33,978][24592] Fps is (10 sec: 39319.0, 60 sec: 40277.5, 300 sec: 38654.8). Total num frames: 2524045312. Throughput: 0: 10066.1. Samples: 380999658. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:33,980][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:34,080][626795] Updated weights for policy 0, policy_version 308112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:36,061][626795] Updated weights for policy 0, policy_version 308122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:38,693][626795] Updated weights for policy 0, policy_version 308132 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:38,975][24592] Fps is (10 sec: 38502.9, 60 sec: 39867.8, 300 sec: 38627.4). Total num frames: 2524225536. Throughput: 0: 9901.5. Samples: 381054870. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:38,976][24592] Avg episode reward: [(0, '4.916')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:40,800][626795] Updated weights for policy 0, policy_version 308142 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:43,007][626795] Updated weights for policy 0, policy_version 308152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:43,976][24592] Fps is (10 sec: 36049.0, 60 sec: 39594.1, 300 sec: 38544.1). Total num frames: 2524405760. Throughput: 0: 9873.6. Samples: 381084108. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:43,978][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:45,569][626795] Updated weights for policy 0, policy_version 308162 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:47,936][626795] Updated weights for policy 0, policy_version 308172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:48,975][24592] Fps is (10 sec: 36045.1, 60 sec: 39185.3, 300 sec: 38544.1). Total num frames: 2524585984. Throughput: 0: 9640.7. Samples: 381134628. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:48,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:50,083][626795] Updated weights for policy 0, policy_version 308182 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:52,811][626795] Updated weights for policy 0, policy_version 308192 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:53,975][24592] Fps is (10 sec: 33590.4, 60 sec: 38365.8, 300 sec: 38433.0). Total num frames: 2524741632. Throughput: 0: 9508.9. Samples: 381184758. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:53,978][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:55,243][626795] Updated weights for policy 0, policy_version 308202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:57,399][626795] Updated weights for policy 0, policy_version 308212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:58,975][24592] Fps is (10 sec: 34406.3, 60 sec: 38092.8, 300 sec: 38349.7). Total num frames: 2524930048. Throughput: 0: 9441.0. Samples: 381211896. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:23:58,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:23:59,434][626795] Updated weights for policy 0, policy_version 308222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:01,417][626795] Updated weights for policy 0, policy_version 308232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:03,361][626795] Updated weights for policy 0, policy_version 308242 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:03,975][24592] Fps is (10 sec: 38502.5, 60 sec: 37819.7, 300 sec: 38349.7). Total num frames: 2525126656. Throughput: 0: 9378.4. Samples: 381272304. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:03,977][24592] Avg episode reward: [(0, '4.410')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:05,613][626795] Updated weights for policy 0, policy_version 308252 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:07,555][626795] Updated weights for policy 0, policy_version 308262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:08,977][24592] Fps is (10 sec: 40952.5, 60 sec: 38364.8, 300 sec: 38460.5). Total num frames: 2525339648. Throughput: 0: 9349.5. Samples: 381332418. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:08,979][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:09,612][626795] Updated weights for policy 0, policy_version 308272 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:12,097][626795] Updated weights for policy 0, policy_version 308282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:13,978][24592] Fps is (10 sec: 39310.4, 60 sec: 38091.0, 300 sec: 38488.2). Total num frames: 2525519872. Throughput: 0: 9251.6. Samples: 381357582. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:13,994][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:14,234][626795] Updated weights for policy 0, policy_version 308292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:16,177][626795] Updated weights for policy 0, policy_version 308302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:18,142][626795] Updated weights for policy 0, policy_version 308312 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:18,975][24592] Fps is (10 sec: 37690.2, 60 sec: 37819.8, 300 sec: 38488.5). Total num frames: 2525716480. Throughput: 0: 9305.4. Samples: 381418380. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:18,978][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:20,200][626795] Updated weights for policy 0, policy_version 308322 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:22,284][626795] Updated weights for policy 0, policy_version 308332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:23,976][24592] Fps is (10 sec: 40150.8, 60 sec: 37820.5, 300 sec: 38516.2). Total num frames: 2525921280. Throughput: 0: 9398.3. Samples: 381477798. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:23,977][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:24,397][626795] Updated weights for policy 0, policy_version 308342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:26,316][626795] Updated weights for policy 0, policy_version 308352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:28,252][626795] Updated weights for policy 0, policy_version 308362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:28,976][24592] Fps is (10 sec: 40958.3, 60 sec: 38092.7, 300 sec: 38571.8). Total num frames: 2526126080. Throughput: 0: 9433.2. Samples: 381508596. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:28,977][24592] Avg episode reward: [(0, '4.844')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:30,208][626795] Updated weights for policy 0, policy_version 308372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:32,343][626795] Updated weights for policy 0, policy_version 308382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:33,975][24592] Fps is (10 sec: 40961.4, 60 sec: 38094.2, 300 sec: 38599.6). Total num frames: 2526330880. Throughput: 0: 9674.9. Samples: 381570000. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:33,976][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:34,384][626795] Updated weights for policy 0, policy_version 308392 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:36,344][626795] Updated weights for policy 0, policy_version 308402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:38,187][626795] Updated weights for policy 0, policy_version 308412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:38,975][24592] Fps is (10 sec: 40961.5, 60 sec: 38502.4, 300 sec: 38682.9). Total num frames: 2526535680. Throughput: 0: 9950.4. Samples: 381632526. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:38,979][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:40,199][626795] Updated weights for policy 0, policy_version 308422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:42,202][626795] Updated weights for policy 0, policy_version 308432 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:43,975][24592] Fps is (10 sec: 39321.7, 60 sec: 38639.6, 300 sec: 38682.9). Total num frames: 2526724096. Throughput: 0: 10039.5. Samples: 381663672. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:43,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:44,703][626795] Updated weights for policy 0, policy_version 308442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:46,784][626795] Updated weights for policy 0, policy_version 308452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:48,811][626795] Updated weights for policy 0, policy_version 308462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:48,975][24592] Fps is (10 sec: 38502.5, 60 sec: 38912.0, 300 sec: 38711.0). Total num frames: 2526920704. Throughput: 0: 9933.7. Samples: 381719322. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:48,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:50,998][626795] Updated weights for policy 0, policy_version 308472 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:52,912][626795] Updated weights for policy 0, policy_version 308482 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:53,975][24592] Fps is (10 sec: 39321.6, 60 sec: 39594.7, 300 sec: 38710.7). Total num frames: 2527117312. Throughput: 0: 9912.5. Samples: 381778464. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:53,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:55,091][626795] Updated weights for policy 0, policy_version 308492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:57,234][626795] Updated weights for policy 0, policy_version 308502 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:58,975][24592] Fps is (10 sec: 40141.0, 60 sec: 39867.8, 300 sec: 38738.4). Total num frames: 2527322112. Throughput: 0: 10006.6. Samples: 381807852. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:24:58,976][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:24:59,104][626795] Updated weights for policy 0, policy_version 308512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:01,282][626795] Updated weights for policy 0, policy_version 308522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:03,321][626795] Updated weights for policy 0, policy_version 308532 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:03,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39867.7, 300 sec: 38710.7). Total num frames: 2527518720. Throughput: 0: 9969.7. Samples: 381867018. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:03,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000308535_2527518720.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:04,137][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000307383_2518081536.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:05,354][626795] Updated weights for policy 0, policy_version 308542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:07,438][626795] Updated weights for policy 0, policy_version 308552 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:08,975][24592] Fps is (10 sec: 39321.5, 60 sec: 39595.9, 300 sec: 38710.7). Total num frames: 2527715328. Throughput: 0: 10013.2. Samples: 381928386. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:08,977][24592] Avg episode reward: [(0, '4.882')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:09,262][626795] Updated weights for policy 0, policy_version 308562 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:11,298][626795] Updated weights for policy 0, policy_version 308572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:13,252][626795] Updated weights for policy 0, policy_version 308582 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:13,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40142.7, 300 sec: 38794.0). Total num frames: 2527928320. Throughput: 0: 10025.4. Samples: 381959736. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:13,976][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:15,223][626795] Updated weights for policy 0, policy_version 308592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:17,577][626795] Updated weights for policy 0, policy_version 308602 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:18,976][24592] Fps is (10 sec: 40139.4, 60 sec: 40004.0, 300 sec: 38849.5). Total num frames: 2528116736. Throughput: 0: 9952.7. Samples: 382017876. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:18,977][24592] Avg episode reward: [(0, '4.384')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:19,650][626795] Updated weights for policy 0, policy_version 308612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:21,516][626795] Updated weights for policy 0, policy_version 308622 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:23,461][626795] Updated weights for policy 0, policy_version 308632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:23,975][24592] Fps is (10 sec: 40140.7, 60 sec: 40141.0, 300 sec: 39043.9). Total num frames: 2528329728. Throughput: 0: 9961.1. Samples: 382080774. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:23,976][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:25,423][626795] Updated weights for policy 0, policy_version 308642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:27,374][626795] Updated weights for policy 0, policy_version 308652 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:28,975][24592] Fps is (10 sec: 42599.4, 60 sec: 40277.5, 300 sec: 39182.7). Total num frames: 2528542720. Throughput: 0: 9974.1. Samples: 382112508. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:28,978][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:29,266][626795] Updated weights for policy 0, policy_version 308662 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:31,344][626795] Updated weights for policy 0, policy_version 308672 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:33,252][626795] Updated weights for policy 0, policy_version 308682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:33,975][24592] Fps is (10 sec: 41778.7, 60 sec: 40277.3, 300 sec: 39210.5). Total num frames: 2528747520. Throughput: 0: 10128.1. Samples: 382175088. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:33,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:35,264][626795] Updated weights for policy 0, policy_version 308692 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:37,125][626795] Updated weights for policy 0, policy_version 308702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:38,975][24592] Fps is (10 sec: 41779.4, 60 sec: 40413.9, 300 sec: 39293.8). Total num frames: 2528960512. Throughput: 0: 10212.9. Samples: 382238046. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:38,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:39,159][626795] Updated weights for policy 0, policy_version 308712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:41,179][626795] Updated weights for policy 0, policy_version 308722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:43,035][626795] Updated weights for policy 0, policy_version 308732 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:43,975][24592] Fps is (10 sec: 41779.7, 60 sec: 40686.9, 300 sec: 39349.4). Total num frames: 2529165312. Throughput: 0: 10242.3. Samples: 382268754. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:43,976][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:44,996][626795] Updated weights for policy 0, policy_version 308742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:46,961][626795] Updated weights for policy 0, policy_version 308752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:48,975][24592] Fps is (10 sec: 39321.6, 60 sec: 40550.4, 300 sec: 39404.9). Total num frames: 2529353728. Throughput: 0: 10336.7. Samples: 382332168. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:48,978][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:49,541][626795] Updated weights for policy 0, policy_version 308762 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:51,472][626795] Updated weights for policy 0, policy_version 308772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:53,460][626795] Updated weights for policy 0, policy_version 308782 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:53,976][24592] Fps is (10 sec: 38501.1, 60 sec: 40550.2, 300 sec: 39432.6). Total num frames: 2529550336. Throughput: 0: 10210.6. Samples: 382387866. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:53,977][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:55,518][626795] Updated weights for policy 0, policy_version 308792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:57,427][626795] Updated weights for policy 0, policy_version 308802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:58,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40823.4, 300 sec: 39627.2). Total num frames: 2529771520. Throughput: 0: 10193.3. Samples: 382418436. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:25:58,977][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:25:59,510][626795] Updated weights for policy 0, policy_version 308812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:01,350][626795] Updated weights for policy 0, policy_version 308822 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:03,313][626795] Updated weights for policy 0, policy_version 308832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:03,976][24592] Fps is (10 sec: 42599.4, 60 sec: 40959.9, 300 sec: 39682.6). Total num frames: 2529976320. Throughput: 0: 10301.5. Samples: 382481442. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:03,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:05,311][626795] Updated weights for policy 0, policy_version 308842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:07,228][626795] Updated weights for policy 0, policy_version 308852 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:08,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41233.1, 300 sec: 39793.7). Total num frames: 2530189312. Throughput: 0: 10316.6. Samples: 382545018. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:08,976][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:09,317][626795] Updated weights for policy 0, policy_version 308862 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:11,021][626795] Updated weights for policy 0, policy_version 308872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:13,024][626795] Updated weights for policy 0, policy_version 308882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:13,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41096.6, 300 sec: 39793.7). Total num frames: 2530394112. Throughput: 0: 10295.9. Samples: 382575822. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:13,976][24592] Avg episode reward: [(0, '4.855')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:15,008][626795] Updated weights for policy 0, policy_version 308892 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:16,961][626795] Updated weights for policy 0, policy_version 308902 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:18,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41506.3, 300 sec: 39849.3). Total num frames: 2530607104. Throughput: 0: 10320.2. Samples: 382639494. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:18,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:18,980][626795] Updated weights for policy 0, policy_version 308912 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:20,897][626795] Updated weights for policy 0, policy_version 308922 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:23,297][626795] Updated weights for policy 0, policy_version 308932 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:23,975][24592] Fps is (10 sec: 40140.4, 60 sec: 41096.5, 300 sec: 39877.1). Total num frames: 2530795520. Throughput: 0: 10202.3. Samples: 382697148. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:23,976][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:25,280][626795] Updated weights for policy 0, policy_version 308942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:27,210][626795] Updated weights for policy 0, policy_version 308952 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:28,976][24592] Fps is (10 sec: 39321.0, 60 sec: 40959.9, 300 sec: 39877.0). Total num frames: 2531000320. Throughput: 0: 10214.2. Samples: 382728396. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:28,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:29,414][626795] Updated weights for policy 0, policy_version 308962 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:31,935][626795] Updated weights for policy 0, policy_version 308972 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:33,975][24592] Fps is (10 sec: 37683.4, 60 sec: 40413.9, 300 sec: 39765.9). Total num frames: 2531172352. Throughput: 0: 9990.0. Samples: 382781718. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:33,977][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:34,191][626795] Updated weights for policy 0, policy_version 308982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:36,238][626795] Updated weights for policy 0, policy_version 308992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:38,091][626795] Updated weights for policy 0, policy_version 309002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:38,975][24592] Fps is (10 sec: 37683.7, 60 sec: 40277.3, 300 sec: 39793.7). Total num frames: 2531377152. Throughput: 0: 10083.5. Samples: 382841622. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:38,977][24592] Avg episode reward: [(0, '4.394')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:40,131][626795] Updated weights for policy 0, policy_version 309012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:42,404][626795] Updated weights for policy 0, policy_version 309022 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:43,976][24592] Fps is (10 sec: 40140.4, 60 sec: 40140.7, 300 sec: 39738.1). Total num frames: 2531573760. Throughput: 0: 10044.9. Samples: 382870458. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:43,978][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:44,284][626795] Updated weights for policy 0, policy_version 309032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:46,258][626795] Updated weights for policy 0, policy_version 309042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:48,400][626795] Updated weights for policy 0, policy_version 309052 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:48,975][24592] Fps is (10 sec: 40141.0, 60 sec: 40413.9, 300 sec: 39793.7). Total num frames: 2531778560. Throughput: 0: 10023.4. Samples: 382932492. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:48,976][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:50,401][626795] Updated weights for policy 0, policy_version 309062 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:52,626][626795] Updated weights for policy 0, policy_version 309072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:54,440][24592] Fps is (10 sec: 36793.6, 60 sec: 39832.6, 300 sec: 39730.4). Total num frames: 2531958784. Throughput: 0: 9771.0. Samples: 382989252. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:54,441][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:55,588][626795] Updated weights for policy 0, policy_version 309082 (0.0031)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:26:58,037][626795] Updated weights for policy 0, policy_version 309092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:58,976][24592] Fps is (10 sec: 32766.8, 60 sec: 38911.8, 300 sec: 39599.3). Total num frames: 2532106240. Throughput: 0: 9645.0. Samples: 383009850. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:26:58,977][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:00,286][626795] Updated weights for policy 0, policy_version 309102 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:02,481][626795] Updated weights for policy 0, policy_version 309112 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:03,977][24592] Fps is (10 sec: 36075.9, 60 sec: 38774.4, 300 sec: 39571.3). Total num frames: 2532302848. Throughput: 0: 9453.2. Samples: 383064906. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:03,980][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000309119_2532302848.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:04,219][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000307966_2522857472.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:04,652][626795] Updated weights for policy 0, policy_version 309122 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:06,786][626795] Updated weights for policy 0, policy_version 309132 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:08,735][626795] Updated weights for policy 0, policy_version 309142 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:08,975][24592] Fps is (10 sec: 38503.4, 60 sec: 38365.8, 300 sec: 39516.0). Total num frames: 2532491264. Throughput: 0: 9437.1. Samples: 383121816. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:08,977][24592] Avg episode reward: [(0, '4.435')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:10,811][626795] Updated weights for policy 0, policy_version 309152 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:12,837][626795] Updated weights for policy 0, policy_version 309162 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:13,975][24592] Fps is (10 sec: 37689.9, 60 sec: 38092.8, 300 sec: 39460.4). Total num frames: 2532679680. Throughput: 0: 9418.6. Samples: 383152230. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:13,976][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:15,169][626795] Updated weights for policy 0, policy_version 309172 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:17,549][626795] Updated weights for policy 0, policy_version 309182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:18,975][24592] Fps is (10 sec: 37683.5, 60 sec: 37683.2, 300 sec: 39404.9). Total num frames: 2532868096. Throughput: 0: 9428.5. Samples: 383206002. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:18,977][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:19,596][626795] Updated weights for policy 0, policy_version 309192 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:21,625][626795] Updated weights for policy 0, policy_version 309202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:23,645][626795] Updated weights for policy 0, policy_version 309212 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:23,976][24592] Fps is (10 sec: 39317.9, 60 sec: 37955.7, 300 sec: 39404.8). Total num frames: 2533072896. Throughput: 0: 9450.5. Samples: 383266902. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:23,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:25,655][626795] Updated weights for policy 0, policy_version 309222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:28,298][626795] Updated weights for policy 0, policy_version 309232 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:28,976][24592] Fps is (10 sec: 38501.2, 60 sec: 37546.6, 300 sec: 39405.2). Total num frames: 2533253120. Throughput: 0: 9462.1. Samples: 383296254. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:28,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:30,348][626795] Updated weights for policy 0, policy_version 309242 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:32,252][626795] Updated weights for policy 0, policy_version 309252 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:33,976][24592] Fps is (10 sec: 38504.6, 60 sec: 38092.6, 300 sec: 39404.9). Total num frames: 2533457920. Throughput: 0: 9341.8. Samples: 383352876. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:33,978][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:34,236][626795] Updated weights for policy 0, policy_version 309262 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:36,203][626795] Updated weights for policy 0, policy_version 309272 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:38,176][626795] Updated weights for policy 0, policy_version 309282 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:38,975][24592] Fps is (10 sec: 41780.8, 60 sec: 38229.4, 300 sec: 39460.5). Total num frames: 2533670912. Throughput: 0: 9569.5. Samples: 383415432. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:38,978][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:40,143][626795] Updated weights for policy 0, policy_version 309292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:42,019][626795] Updated weights for policy 0, policy_version 309302 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:43,975][24592] Fps is (10 sec: 41780.6, 60 sec: 38365.9, 300 sec: 39460.5). Total num frames: 2533875712. Throughput: 0: 9707.3. Samples: 383446674. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:43,976][24592] Avg episode reward: [(0, '5.050')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:44,024][626795] Updated weights for policy 0, policy_version 309312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:46,159][626795] Updated weights for policy 0, policy_version 309322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:48,263][626795] Updated weights for policy 0, policy_version 309332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:48,975][24592] Fps is (10 sec: 40140.3, 60 sec: 38229.3, 300 sec: 39432.7). Total num frames: 2534072320. Throughput: 0: 9811.4. Samples: 383506404. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:48,976][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:50,306][626795] Updated weights for policy 0, policy_version 309342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:52,427][626795] Updated weights for policy 0, policy_version 309352 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:53,978][24592] Fps is (10 sec: 39312.8, 60 sec: 38801.4, 300 sec: 39404.6). Total num frames: 2534268928. Throughput: 0: 9871.1. Samples: 383566038. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:53,980][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:54,448][626795] Updated weights for policy 0, policy_version 309362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:56,616][626795] Updated weights for policy 0, policy_version 309372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:27:58,701][626795] Updated weights for policy 0, policy_version 309382 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:58,975][24592] Fps is (10 sec: 39321.3, 60 sec: 39321.7, 300 sec: 39349.3). Total num frames: 2534465536. Throughput: 0: 9837.4. Samples: 383594916. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:27:58,976][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:01,363][626795] Updated weights for policy 0, policy_version 309392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:03,369][626795] Updated weights for policy 0, policy_version 309402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:03,977][24592] Fps is (10 sec: 36867.9, 60 sec: 38912.4, 300 sec: 39321.5). Total num frames: 2534637568. Throughput: 0: 9828.3. Samples: 383648286. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:03,979][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:05,572][626795] Updated weights for policy 0, policy_version 309412 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:07,456][626795] Updated weights for policy 0, policy_version 309422 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:08,976][24592] Fps is (10 sec: 37682.8, 60 sec: 39185.0, 300 sec: 39349.3). Total num frames: 2534842368. Throughput: 0: 9809.4. Samples: 383708316. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:08,977][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:09,678][626795] Updated weights for policy 0, policy_version 309432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:11,456][626795] Updated weights for policy 0, policy_version 309442 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:13,556][626772] Signal inference workers to stop experience collection... (5000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:13,556][626772] Signal inference workers to resume experience collection... (5000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:13,569][626795] InferenceWorker_p0-w0: stopping experience collection (5000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:13,571][626795] InferenceWorker_p0-w0: resuming experience collection (5000 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:13,604][626795] Updated weights for policy 0, policy_version 309452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:13,976][24592] Fps is (10 sec: 40144.5, 60 sec: 39321.4, 300 sec: 39293.8). Total num frames: 2535038976. Throughput: 0: 9830.5. Samples: 383738628. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:13,977][24592] Avg episode reward: [(0, '4.903')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:15,787][626795] Updated weights for policy 0, policy_version 309462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:17,719][626795] Updated weights for policy 0, policy_version 309472 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:18,976][24592] Fps is (10 sec: 40141.0, 60 sec: 39594.5, 300 sec: 39294.0). Total num frames: 2535243776. Throughput: 0: 9904.0. Samples: 383798556. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:18,978][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:19,831][626795] Updated weights for policy 0, policy_version 309482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:21,904][626795] Updated weights for policy 0, policy_version 309492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:23,947][626795] Updated weights for policy 0, policy_version 309502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:23,978][24592] Fps is (10 sec: 40130.3, 60 sec: 39456.8, 300 sec: 39321.2). Total num frames: 2535440384. Throughput: 0: 9843.5. Samples: 383858418. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:23,979][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:26,006][626795] Updated weights for policy 0, policy_version 309512 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:28,016][626795] Updated weights for policy 0, policy_version 309522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:28,975][24592] Fps is (10 sec: 39322.4, 60 sec: 39731.4, 300 sec: 39294.1). Total num frames: 2535636992. Throughput: 0: 9807.2. Samples: 383887998. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:28,977][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:29,979][626795] Updated weights for policy 0, policy_version 309532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:32,113][626795] Updated weights for policy 0, policy_version 309542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:33,975][24592] Fps is (10 sec: 38513.6, 60 sec: 39458.4, 300 sec: 39321.6). Total num frames: 2535825408. Throughput: 0: 9775.6. Samples: 383946306. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:33,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:34,647][626795] Updated weights for policy 0, policy_version 309552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:36,533][626795] Updated weights for policy 0, policy_version 309562 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:38,417][626795] Updated weights for policy 0, policy_version 309572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:38,975][24592] Fps is (10 sec: 39321.6, 60 sec: 39321.6, 300 sec: 39405.0). Total num frames: 2536030208. Throughput: 0: 9784.8. Samples: 384006330. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:38,976][24592] Avg episode reward: [(0, '4.839')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:40,455][626795] Updated weights for policy 0, policy_version 309582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:42,307][626795] Updated weights for policy 0, policy_version 309592 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:43,975][24592] Fps is (10 sec: 42598.3, 60 sec: 39594.7, 300 sec: 39543.7). Total num frames: 2536251392. Throughput: 0: 9848.0. Samples: 384038076. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:43,976][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:44,178][626795] Updated weights for policy 0, policy_version 309602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:46,152][626795] Updated weights for policy 0, policy_version 309612 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:48,155][626795] Updated weights for policy 0, policy_version 309622 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:48,975][24592] Fps is (10 sec: 42598.4, 60 sec: 39731.2, 300 sec: 39710.4). Total num frames: 2536456192. Throughput: 0: 10073.9. Samples: 384101598. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:48,977][24592] Avg episode reward: [(0, '4.816')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:49,981][626795] Updated weights for policy 0, policy_version 309632 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:52,052][626795] Updated weights for policy 0, policy_version 309642 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:53,975][24592] Fps is (10 sec: 40140.9, 60 sec: 39732.7, 300 sec: 39738.1). Total num frames: 2536652800. Throughput: 0: 10116.1. Samples: 384163536. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:53,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:54,318][626795] Updated weights for policy 0, policy_version 309652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:56,456][626795] Updated weights for policy 0, policy_version 309662 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:28:58,319][626795] Updated weights for policy 0, policy_version 309672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:58,975][24592] Fps is (10 sec: 39321.7, 60 sec: 39731.3, 300 sec: 39738.1). Total num frames: 2536849408. Throughput: 0: 10048.7. Samples: 384190818. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:28:58,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:00,434][626795] Updated weights for policy 0, policy_version 309682 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:02,445][626795] Updated weights for policy 0, policy_version 309692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:03,975][24592] Fps is (10 sec: 40140.6, 60 sec: 40278.1, 300 sec: 39710.6). Total num frames: 2537054208. Throughput: 0: 10095.6. Samples: 384252858. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:03,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:04,053][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000309700_2537062400.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:04,260][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000308535_2527518720.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:04,445][626795] Updated weights for policy 0, policy_version 309702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:06,920][626795] Updated weights for policy 0, policy_version 309712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:08,976][24592] Fps is (10 sec: 38499.7, 60 sec: 39867.4, 300 sec: 39710.7). Total num frames: 2537234432. Throughput: 0: 10006.5. Samples: 384308688. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:08,978][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:08,989][626795] Updated weights for policy 0, policy_version 309722 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:10,813][626795] Updated weights for policy 0, policy_version 309732 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:12,749][626795] Updated weights for policy 0, policy_version 309742 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:13,975][24592] Fps is (10 sec: 39321.6, 60 sec: 40141.0, 300 sec: 39765.9). Total num frames: 2537447424. Throughput: 0: 10048.4. Samples: 384340176. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:13,976][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:14,778][626795] Updated weights for policy 0, policy_version 309752 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:16,704][626795] Updated weights for policy 0, policy_version 309762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:18,679][626795] Updated weights for policy 0, policy_version 309772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:18,975][24592] Fps is (10 sec: 42600.8, 60 sec: 40277.4, 300 sec: 39793.7). Total num frames: 2537660416. Throughput: 0: 10152.5. Samples: 384403170. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:18,976][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:20,611][626795] Updated weights for policy 0, policy_version 309782 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:22,587][626795] Updated weights for policy 0, policy_version 309792 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:23,975][24592] Fps is (10 sec: 42598.6, 60 sec: 40552.4, 300 sec: 39821.5). Total num frames: 2537873408. Throughput: 0: 10224.4. Samples: 384466428. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:23,976][24592] Avg episode reward: [(0, '4.423')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:24,443][626795] Updated weights for policy 0, policy_version 309802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:26,375][626795] Updated weights for policy 0, policy_version 309812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:28,328][626795] Updated weights for policy 0, policy_version 309822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:28,975][24592] Fps is (10 sec: 42598.7, 60 sec: 40823.5, 300 sec: 39849.2). Total num frames: 2538086400. Throughput: 0: 10230.1. Samples: 384498432. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:28,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:30,283][626795] Updated weights for policy 0, policy_version 309832 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:32,224][626795] Updated weights for policy 0, policy_version 309842 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:33,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41233.1, 300 sec: 39877.0). Total num frames: 2538299392. Throughput: 0: 10232.7. Samples: 384562068. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:33,978][24592] Avg episode reward: [(0, '4.733')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:34,247][626795] Updated weights for policy 0, policy_version 309852 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:36,112][626795] Updated weights for policy 0, policy_version 309862 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:38,526][626795] Updated weights for policy 0, policy_version 309872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:38,975][24592] Fps is (10 sec: 40140.9, 60 sec: 40960.0, 300 sec: 39877.0). Total num frames: 2538487808. Throughput: 0: 10136.5. Samples: 384619680. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:38,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:40,512][626795] Updated weights for policy 0, policy_version 309882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:42,441][626795] Updated weights for policy 0, policy_version 309892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:43,975][24592] Fps is (10 sec: 40141.0, 60 sec: 40823.5, 300 sec: 39932.5). Total num frames: 2538700800. Throughput: 0: 10223.6. Samples: 384650880. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:43,978][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:44,452][626795] Updated weights for policy 0, policy_version 309902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:46,378][626795] Updated weights for policy 0, policy_version 309912 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:48,225][626795] Updated weights for policy 0, policy_version 309922 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40823.4, 300 sec: 39960.3). Total num frames: 2538905600. Throughput: 0: 10262.1. Samples: 384714654. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:48,976][24592] Avg episode reward: [(0, '4.242')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:50,218][626795] Updated weights for policy 0, policy_version 309932 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:52,129][626795] Updated weights for policy 0, policy_version 309942 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:53,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41096.5, 300 sec: 39988.1). Total num frames: 2539118592. Throughput: 0: 10422.2. Samples: 384777678. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:53,976][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:54,194][626795] Updated weights for policy 0, policy_version 309952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:56,102][626795] Updated weights for policy 0, policy_version 309962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:57,981][626795] Updated weights for policy 0, policy_version 309972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:58,976][24592] Fps is (10 sec: 41778.9, 60 sec: 41233.0, 300 sec: 40015.8). Total num frames: 2539323392. Throughput: 0: 10414.5. Samples: 384808830. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:29:58,979][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:29:59,852][626795] Updated weights for policy 0, policy_version 309982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:01,885][626795] Updated weights for policy 0, policy_version 309992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:03,841][626795] Updated weights for policy 0, policy_version 310002 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:03,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.6, 300 sec: 40071.4). Total num frames: 2539536384. Throughput: 0: 10423.9. Samples: 384872244. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:03,976][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:05,791][626795] Updated weights for policy 0, policy_version 310012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:07,748][626795] Updated weights for policy 0, policy_version 310022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:08,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41916.2, 300 sec: 40071.4). Total num frames: 2539749376. Throughput: 0: 10429.1. Samples: 384935736. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:08,977][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:09,655][626795] Updated weights for policy 0, policy_version 310032 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:12,199][626795] Updated weights for policy 0, policy_version 310042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:13,976][24592] Fps is (10 sec: 40139.6, 60 sec: 41506.0, 300 sec: 40071.4). Total num frames: 2539937792. Throughput: 0: 10287.4. Samples: 384961368. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:13,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:14,155][626795] Updated weights for policy 0, policy_version 310052 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:16,063][626795] Updated weights for policy 0, policy_version 310062 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:18,029][626795] Updated weights for policy 0, policy_version 310072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:18,975][24592] Fps is (10 sec: 39321.3, 60 sec: 41369.6, 300 sec: 40043.6). Total num frames: 2540142592. Throughput: 0: 10276.9. Samples: 385024530. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:18,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:19,950][626795] Updated weights for policy 0, policy_version 310082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:22,092][626795] Updated weights for policy 0, policy_version 310092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:23,876][626795] Updated weights for policy 0, policy_version 310102 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:23,975][24592] Fps is (10 sec: 41780.4, 60 sec: 41369.6, 300 sec: 40043.6). Total num frames: 2540355584. Throughput: 0: 10381.9. Samples: 385086864. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:23,977][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:25,867][626795] Updated weights for policy 0, policy_version 310112 (0.0034)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:27,861][626795] Updated weights for policy 0, policy_version 310122 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:28,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41369.6, 300 sec: 40071.4). Total num frames: 2540568576. Throughput: 0: 10389.8. Samples: 385118424. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:28,977][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:29,783][626795] Updated weights for policy 0, policy_version 310132 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:31,786][626795] Updated weights for policy 0, policy_version 310142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:33,704][626795] Updated weights for policy 0, policy_version 310152 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:33,977][24592] Fps is (10 sec: 41773.4, 60 sec: 41232.1, 300 sec: 40043.4). Total num frames: 2540773376. Throughput: 0: 10347.2. Samples: 385180290. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:33,978][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:35,721][626795] Updated weights for policy 0, policy_version 310162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:37,681][626795] Updated weights for policy 0, policy_version 310172 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:38,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41506.1, 300 sec: 40043.6). Total num frames: 2540978176. Throughput: 0: 10349.2. Samples: 385243392. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:38,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:39,693][626795] Updated weights for policy 0, policy_version 310182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:41,473][626795] Updated weights for policy 0, policy_version 310192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:44,013][24592] Fps is (10 sec: 39178.2, 60 sec: 41070.5, 300 sec: 40038.5). Total num frames: 2541166592. Throughput: 0: 10348.8. Samples: 385274916. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:44,016][24592] Avg episode reward: [(0, '4.920')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:44,029][626795] Updated weights for policy 0, policy_version 310202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:46,030][626795] Updated weights for policy 0, policy_version 310212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:48,042][626795] Updated weights for policy 0, policy_version 310222 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:48,975][24592] Fps is (10 sec: 40141.1, 60 sec: 41233.1, 300 sec: 40099.2). Total num frames: 2541379584. Throughput: 0: 10214.4. Samples: 385331892. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:48,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:49,921][626795] Updated weights for policy 0, policy_version 310232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:51,961][626795] Updated weights for policy 0, policy_version 310242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:53,915][626795] Updated weights for policy 0, policy_version 310252 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:53,975][24592] Fps is (10 sec: 41938.4, 60 sec: 41096.5, 300 sec: 40043.6). Total num frames: 2541584384. Throughput: 0: 10172.3. Samples: 385393488. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:53,977][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:55,892][626795] Updated weights for policy 0, policy_version 310262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:57,829][626795] Updated weights for policy 0, policy_version 310272 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:58,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41096.6, 300 sec: 40043.6). Total num frames: 2541789184. Throughput: 0: 10305.9. Samples: 385425132. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:30:58,976][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:30:59,849][626795] Updated weights for policy 0, policy_version 310282 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:01,751][626795] Updated weights for policy 0, policy_version 310292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:03,670][626795] Updated weights for policy 0, policy_version 310302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:03,976][24592] Fps is (10 sec: 41776.6, 60 sec: 41096.1, 300 sec: 40043.5). Total num frames: 2542002176. Throughput: 0: 10306.9. Samples: 385488348. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:03,986][24592] Avg episode reward: [(0, '4.920')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:03,993][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000310303_2542002176.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:04,165][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000309119_2532302848.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:05,708][626795] Updated weights for policy 0, policy_version 310312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:07,673][626795] Updated weights for policy 0, policy_version 310322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:08,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40960.0, 300 sec: 40043.6). Total num frames: 2542206976. Throughput: 0: 10296.4. Samples: 385550202. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:08,976][24592] Avg episode reward: [(0, '4.421')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:09,671][626795] Updated weights for policy 0, policy_version 310332 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:11,672][626795] Updated weights for policy 0, policy_version 310342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:13,502][626795] Updated weights for policy 0, policy_version 310352 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:13,975][24592] Fps is (10 sec: 41781.9, 60 sec: 41369.8, 300 sec: 40043.6). Total num frames: 2542419968. Throughput: 0: 10298.3. Samples: 385581846. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:13,976][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:15,609][626795] Updated weights for policy 0, policy_version 310362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:18,015][626795] Updated weights for policy 0, policy_version 310372 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:18,975][24592] Fps is (10 sec: 39321.6, 60 sec: 40960.0, 300 sec: 40015.8). Total num frames: 2542600192. Throughput: 0: 10184.2. Samples: 385638564. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:18,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:20,065][626795] Updated weights for policy 0, policy_version 310382 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:21,954][626795] Updated weights for policy 0, policy_version 310392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:23,943][626795] Updated weights for policy 0, policy_version 310402 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:23,975][24592] Fps is (10 sec: 39321.7, 60 sec: 40960.0, 300 sec: 40043.6). Total num frames: 2542813184. Throughput: 0: 10175.6. Samples: 385701294. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:23,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:25,843][626795] Updated weights for policy 0, policy_version 310412 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:27,738][626795] Updated weights for policy 0, policy_version 310422 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:28,976][24592] Fps is (10 sec: 41776.4, 60 sec: 40823.0, 300 sec: 40154.6). Total num frames: 2543017984. Throughput: 0: 10185.8. Samples: 385732896. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:28,982][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:29,788][626795] Updated weights for policy 0, policy_version 310432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:31,679][626795] Updated weights for policy 0, policy_version 310442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:33,625][626795] Updated weights for policy 0, policy_version 310452 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:33,975][24592] Fps is (10 sec: 41779.0, 60 sec: 40960.9, 300 sec: 40182.5). Total num frames: 2543230976. Throughput: 0: 10320.0. Samples: 385796292. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:33,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:35,526][626795] Updated weights for policy 0, policy_version 310462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:37,464][626795] Updated weights for policy 0, policy_version 310472 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:38,976][24592] Fps is (10 sec: 42600.5, 60 sec: 41096.4, 300 sec: 40238.0). Total num frames: 2543443968. Throughput: 0: 10356.9. Samples: 385859550. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:38,980][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:39,549][626795] Updated weights for policy 0, policy_version 310482 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:41,455][626795] Updated weights for policy 0, policy_version 310492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:43,274][626795] Updated weights for policy 0, policy_version 310502 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:43,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41532.4, 300 sec: 40265.8). Total num frames: 2543656960. Throughput: 0: 10340.4. Samples: 385890450. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:43,976][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:45,329][626795] Updated weights for policy 0, policy_version 310512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:47,190][626795] Updated weights for policy 0, policy_version 310522 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:48,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41369.6, 300 sec: 40412.7). Total num frames: 2543861760. Throughput: 0: 10334.7. Samples: 385953402. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:48,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:49,913][626795] Updated weights for policy 0, policy_version 310532 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:51,745][626795] Updated weights for policy 0, policy_version 310542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:53,666][626795] Updated weights for policy 0, policy_version 310552 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:53,975][24592] Fps is (10 sec: 39321.5, 60 sec: 41096.5, 300 sec: 40488.0). Total num frames: 2544050176. Throughput: 0: 10234.6. Samples: 386010762. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:53,976][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:55,679][626795] Updated weights for policy 0, policy_version 310562 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:57,649][626795] Updated weights for policy 0, policy_version 310572 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:58,976][24592] Fps is (10 sec: 40140.1, 60 sec: 41232.9, 300 sec: 40543.7). Total num frames: 2544263168. Throughput: 0: 10223.2. Samples: 386041890. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:31:58,976][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:31:59,593][626795] Updated weights for policy 0, policy_version 310582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:01,529][626795] Updated weights for policy 0, policy_version 310592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:03,400][626795] Updated weights for policy 0, policy_version 310602 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41096.9, 300 sec: 40599.0). Total num frames: 2544467968. Throughput: 0: 10368.0. Samples: 386105124. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:03,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:05,409][626795] Updated weights for policy 0, policy_version 310612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:07,414][626795] Updated weights for policy 0, policy_version 310622 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:08,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41233.1, 300 sec: 40682.3). Total num frames: 2544680960. Throughput: 0: 10366.7. Samples: 386167794. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:08,976][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:09,389][626795] Updated weights for policy 0, policy_version 310632 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:11,290][626795] Updated weights for policy 0, policy_version 310642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:13,204][626795] Updated weights for policy 0, policy_version 310652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:13,975][24592] Fps is (10 sec: 42599.0, 60 sec: 41233.1, 300 sec: 40765.6). Total num frames: 2544893952. Throughput: 0: 10356.6. Samples: 386198934. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:13,977][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:15,285][626795] Updated weights for policy 0, policy_version 310662 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:17,183][626795] Updated weights for policy 0, policy_version 310672 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:18,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41642.7, 300 sec: 40765.7). Total num frames: 2545098752. Throughput: 0: 10336.8. Samples: 386261448. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:18,976][24592] Avg episode reward: [(0, '4.446')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:19,260][626795] Updated weights for policy 0, policy_version 310682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:21,067][626795] Updated weights for policy 0, policy_version 310692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:23,592][626795] Updated weights for policy 0, policy_version 310702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:23,975][24592] Fps is (10 sec: 38502.4, 60 sec: 41096.5, 300 sec: 40765.7). Total num frames: 2545278976. Throughput: 0: 10197.0. Samples: 386318412. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:23,976][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:25,597][626795] Updated weights for policy 0, policy_version 310712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:27,585][626795] Updated weights for policy 0, policy_version 310722 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:28,975][24592] Fps is (10 sec: 39321.6, 60 sec: 41233.5, 300 sec: 40793.4). Total num frames: 2545491968. Throughput: 0: 10203.5. Samples: 386349606. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:28,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:29,534][626795] Updated weights for policy 0, policy_version 310732 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:31,506][626795] Updated weights for policy 0, policy_version 310742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:33,336][626795] Updated weights for policy 0, policy_version 310752 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:33,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41096.6, 300 sec: 40765.6). Total num frames: 2545696768. Throughput: 0: 10213.7. Samples: 386413020. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:33,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:35,531][626795] Updated weights for policy 0, policy_version 310762 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:37,393][626795] Updated weights for policy 0, policy_version 310772 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:38,976][24592] Fps is (10 sec: 41776.2, 60 sec: 41096.2, 300 sec: 40793.3). Total num frames: 2545909760. Throughput: 0: 10318.0. Samples: 386475078. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:38,978][24592] Avg episode reward: [(0, '4.920')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:39,483][626795] Updated weights for policy 0, policy_version 310782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:41,261][626795] Updated weights for policy 0, policy_version 310792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:43,302][626795] Updated weights for policy 0, policy_version 310802 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40960.1, 300 sec: 40821.2). Total num frames: 2546114560. Throughput: 0: 10310.1. Samples: 386505840. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:43,976][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:45,202][626795] Updated weights for policy 0, policy_version 310812 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:47,223][626795] Updated weights for policy 0, policy_version 310822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:48,975][24592] Fps is (10 sec: 40963.2, 60 sec: 40960.0, 300 sec: 40849.2). Total num frames: 2546319360. Throughput: 0: 10304.4. Samples: 386568822. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:48,977][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:49,192][626795] Updated weights for policy 0, policy_version 310832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:51,238][626795] Updated weights for policy 0, policy_version 310842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:53,112][626795] Updated weights for policy 0, policy_version 310852 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:53,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41369.7, 300 sec: 40904.5). Total num frames: 2546532352. Throughput: 0: 10287.7. Samples: 386630742. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:53,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:55,668][626795] Updated weights for policy 0, policy_version 310862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:57,602][626795] Updated weights for policy 0, policy_version 310872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:58,975][24592] Fps is (10 sec: 39321.3, 60 sec: 40823.6, 300 sec: 40932.4). Total num frames: 2546712576. Throughput: 0: 10159.5. Samples: 386656110. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:32:58,977][24592] Avg episode reward: [(0, '4.358')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:32:59,626][626795] Updated weights for policy 0, policy_version 310882 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:01,614][626795] Updated weights for policy 0, policy_version 310892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:03,466][626795] Updated weights for policy 0, policy_version 310902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:03,975][24592] Fps is (10 sec: 39321.5, 60 sec: 40960.0, 300 sec: 40960.0). Total num frames: 2546925568. Throughput: 0: 10169.7. Samples: 386719086. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:03,977][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000310904_2546925568.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:04,119][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000309700_2537062400.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:05,494][626795] Updated weights for policy 0, policy_version 310912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:07,505][626795] Updated weights for policy 0, policy_version 310922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:08,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40823.5, 300 sec: 40987.8). Total num frames: 2547130368. Throughput: 0: 10299.5. Samples: 386781888. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:08,977][24592] Avg episode reward: [(0, '4.779')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:09,363][626795] Updated weights for policy 0, policy_version 310932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:11,361][626795] Updated weights for policy 0, policy_version 310942 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:13,274][626795] Updated weights for policy 0, policy_version 310952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 40823.4, 300 sec: 41015.6). Total num frames: 2547343360. Throughput: 0: 10295.6. Samples: 386812908. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:13,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:15,287][626795] Updated weights for policy 0, policy_version 310962 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:17,195][626795] Updated weights for policy 0, policy_version 310972 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:18,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40823.5, 300 sec: 41043.7). Total num frames: 2547548160. Throughput: 0: 10276.8. Samples: 386875476. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:18,976][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:19,328][626795] Updated weights for policy 0, policy_version 310982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:21,215][626795] Updated weights for policy 0, policy_version 310992 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:23,028][626795] Updated weights for policy 0, policy_version 311002 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41369.6, 300 sec: 41098.8). Total num frames: 2547761152. Throughput: 0: 10297.9. Samples: 386938476. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:23,977][24592] Avg episode reward: [(0, '4.833')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:25,077][626795] Updated weights for policy 0, policy_version 311012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:27,154][626795] Updated weights for policy 0, policy_version 311022 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:28,975][24592] Fps is (10 sec: 40141.0, 60 sec: 40960.0, 300 sec: 41098.9). Total num frames: 2547949568. Throughput: 0: 10297.3. Samples: 386969220. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:28,977][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:29,608][626795] Updated weights for policy 0, policy_version 311032 (0.0556)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:31,466][626795] Updated weights for policy 0, policy_version 311042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:33,392][626795] Updated weights for policy 0, policy_version 311052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:33,975][24592] Fps is (10 sec: 39321.8, 60 sec: 40960.0, 300 sec: 41098.9). Total num frames: 2548154368. Throughput: 0: 10183.7. Samples: 387027090. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:33,976][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:35,348][626795] Updated weights for policy 0, policy_version 311062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:37,348][626795] Updated weights for policy 0, policy_version 311072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:38,975][24592] Fps is (10 sec: 41778.7, 60 sec: 40960.4, 300 sec: 41071.1). Total num frames: 2548367360. Throughput: 0: 10213.1. Samples: 387090330. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:38,977][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:39,312][626795] Updated weights for policy 0, policy_version 311082 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:41,259][626795] Updated weights for policy 0, policy_version 311092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:43,133][626795] Updated weights for policy 0, policy_version 311102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:43,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41096.5, 300 sec: 41098.8). Total num frames: 2548580352. Throughput: 0: 10333.1. Samples: 387121098. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:43,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:45,256][626795] Updated weights for policy 0, policy_version 311112 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:46,983][626795] Updated weights for policy 0, policy_version 311122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:48,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41096.5, 300 sec: 41126.6). Total num frames: 2548785152. Throughput: 0: 10333.6. Samples: 387184098. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:48,980][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:49,005][626795] Updated weights for policy 0, policy_version 311132 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:50,995][626795] Updated weights for policy 0, policy_version 311142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:53,006][626795] Updated weights for policy 0, policy_version 311152 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:53,976][24592] Fps is (10 sec: 40959.4, 60 sec: 40959.9, 300 sec: 41154.4). Total num frames: 2548989952. Throughput: 0: 10314.4. Samples: 387246036. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:53,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:54,969][626795] Updated weights for policy 0, policy_version 311162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:56,919][626795] Updated weights for policy 0, policy_version 311172 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:33:58,879][626795] Updated weights for policy 0, policy_version 311182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:58,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.2, 300 sec: 41182.2). Total num frames: 2549202944. Throughput: 0: 10331.2. Samples: 387277812. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:33:58,976][24592] Avg episode reward: [(0, '5.009')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:01,272][626795] Updated weights for policy 0, policy_version 311192 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:03,270][626795] Updated weights for policy 0, policy_version 311202 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:03,975][24592] Fps is (10 sec: 40141.6, 60 sec: 41096.6, 300 sec: 41210.0). Total num frames: 2549391360. Throughput: 0: 10234.5. Samples: 387336030. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:03,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:05,251][626795] Updated weights for policy 0, policy_version 311212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:07,050][626795] Updated weights for policy 0, policy_version 311222 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:08,975][24592] Fps is (10 sec: 40141.1, 60 sec: 41233.1, 300 sec: 41209.9). Total num frames: 2549604352. Throughput: 0: 10254.1. Samples: 387399912. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:08,977][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:09,064][626795] Updated weights for policy 0, policy_version 311232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:10,916][626795] Updated weights for policy 0, policy_version 311242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:12,875][626795] Updated weights for policy 0, policy_version 311252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:13,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41233.0, 300 sec: 41209.9). Total num frames: 2549817344. Throughput: 0: 10276.8. Samples: 387431676. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:13,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:14,799][626795] Updated weights for policy 0, policy_version 311262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:16,775][626795] Updated weights for policy 0, policy_version 311272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:18,634][626795] Updated weights for policy 0, policy_version 311282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:18,975][24592] Fps is (10 sec: 42598.0, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2550030336. Throughput: 0: 10404.0. Samples: 387495270. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:18,979][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:20,645][626795] Updated weights for policy 0, policy_version 311292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:22,586][626795] Updated weights for policy 0, policy_version 311302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:23,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2550243328. Throughput: 0: 10410.8. Samples: 387558816. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:23,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:24,463][626795] Updated weights for policy 0, policy_version 311312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:26,443][626795] Updated weights for policy 0, policy_version 311322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:28,252][626795] Updated weights for policy 0, policy_version 311332 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:28,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.2, 300 sec: 41209.9). Total num frames: 2550456320. Throughput: 0: 10445.6. Samples: 387591150. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:28,978][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:30,264][626795] Updated weights for policy 0, policy_version 311342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:32,118][626795] Updated weights for policy 0, policy_version 311352 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:33,975][24592] Fps is (10 sec: 40141.0, 60 sec: 41506.1, 300 sec: 41209.9). Total num frames: 2550644736. Throughput: 0: 10472.5. Samples: 387655362. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:33,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:34,721][626795] Updated weights for policy 0, policy_version 311362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:36,556][626795] Updated weights for policy 0, policy_version 311372 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:38,456][626795] Updated weights for policy 0, policy_version 311382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:38,975][24592] Fps is (10 sec: 40140.9, 60 sec: 41506.2, 300 sec: 41209.9). Total num frames: 2550857728. Throughput: 0: 10386.0. Samples: 387713406. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:38,976][24592] Avg episode reward: [(0, '5.032')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:40,372][626795] Updated weights for policy 0, policy_version 311392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:42,335][626795] Updated weights for policy 0, policy_version 311402 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:43,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41506.1, 300 sec: 41237.7). Total num frames: 2551070720. Throughput: 0: 10370.0. Samples: 387744462. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:43,976][24592] Avg episode reward: [(0, '4.390')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:44,204][626795] Updated weights for policy 0, policy_version 311412 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:46,151][626795] Updated weights for policy 0, policy_version 311422 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:48,018][626795] Updated weights for policy 0, policy_version 311432 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:48,975][24592] Fps is (10 sec: 43417.5, 60 sec: 41779.2, 300 sec: 41265.5). Total num frames: 2551291904. Throughput: 0: 10509.2. Samples: 387808944. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:48,976][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:50,006][626795] Updated weights for policy 0, policy_version 311442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:51,941][626795] Updated weights for policy 0, policy_version 311452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:53,863][626795] Updated weights for policy 0, policy_version 311462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:53,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41779.3, 300 sec: 41265.5). Total num frames: 2551496704. Throughput: 0: 10498.6. Samples: 387872352. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:53,977][24592] Avg episode reward: [(0, '4.839')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:55,873][626795] Updated weights for policy 0, policy_version 311472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:57,715][626795] Updated weights for policy 0, policy_version 311482 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:58,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41642.7, 300 sec: 41237.7). Total num frames: 2551701504. Throughput: 0: 10495.1. Samples: 387903954. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:34:58,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:34:59,842][626795] Updated weights for policy 0, policy_version 311492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:01,696][626795] Updated weights for policy 0, policy_version 311502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:03,657][626795] Updated weights for policy 0, policy_version 311512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 42052.2, 300 sec: 41237.7). Total num frames: 2551914496. Throughput: 0: 10473.2. Samples: 387966564. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:03,976][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:03,986][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000311514_2551922688.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:04,117][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000310303_2542002176.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:05,526][626795] Updated weights for policy 0, policy_version 311522 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:08,069][626795] Updated weights for policy 0, policy_version 311532 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:08,975][24592] Fps is (10 sec: 40140.8, 60 sec: 41642.6, 300 sec: 41237.7). Total num frames: 2552102912. Throughput: 0: 10331.6. Samples: 388023738. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:08,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:09,982][626795] Updated weights for policy 0, policy_version 311542 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:11,907][626795] Updated weights for policy 0, policy_version 311552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:13,865][626795] Updated weights for policy 0, policy_version 311562 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:13,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41779.3, 300 sec: 41293.3). Total num frames: 2552324096. Throughput: 0: 10326.9. Samples: 388055862. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:13,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:15,867][626795] Updated weights for policy 0, policy_version 311572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:17,647][626795] Updated weights for policy 0, policy_version 311582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:18,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41642.7, 300 sec: 41265.5). Total num frames: 2552528896. Throughput: 0: 10332.4. Samples: 388120320. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:18,976][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:19,645][626795] Updated weights for policy 0, policy_version 311592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:21,526][626795] Updated weights for policy 0, policy_version 311602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:23,526][626795] Updated weights for policy 0, policy_version 311612 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:23,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41642.7, 300 sec: 41265.5). Total num frames: 2552741888. Throughput: 0: 10458.5. Samples: 388184040. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:23,976][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:25,438][626795] Updated weights for policy 0, policy_version 311622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:27,371][626795] Updated weights for policy 0, policy_version 311632 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:28,976][24592] Fps is (10 sec: 42598.0, 60 sec: 41642.6, 300 sec: 41293.4). Total num frames: 2552954880. Throughput: 0: 10471.2. Samples: 388215666. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:28,977][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:29,279][626795] Updated weights for policy 0, policy_version 311642 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:31,188][626795] Updated weights for policy 0, policy_version 311652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:33,121][626795] Updated weights for policy 0, policy_version 311662 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:33,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42052.2, 300 sec: 41321.0). Total num frames: 2553167872. Throughput: 0: 10473.5. Samples: 388280250. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:33,977][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:34,936][626795] Updated weights for policy 0, policy_version 311672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:36,882][626795] Updated weights for policy 0, policy_version 311682 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:39,180][24592] Fps is (10 sec: 40140.9, 60 sec: 41637.5, 300 sec: 41325.5). Total num frames: 2553364480. Throughput: 0: 10430.8. Samples: 388343868. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:39,181][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:39,455][626795] Updated weights for policy 0, policy_version 311692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:41,420][626795] Updated weights for policy 0, policy_version 311702 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:43,251][626795] Updated weights for policy 0, policy_version 311712 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:43,975][24592] Fps is (10 sec: 40140.9, 60 sec: 41642.6, 300 sec: 41321.0). Total num frames: 2553569280. Throughput: 0: 10336.5. Samples: 388369098. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:43,977][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:45,280][626795] Updated weights for policy 0, policy_version 311722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:47,175][626795] Updated weights for policy 0, policy_version 311732 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:48,976][24592] Fps is (10 sec: 41811.7, 60 sec: 41369.3, 300 sec: 41320.9). Total num frames: 2553774080. Throughput: 0: 10350.3. Samples: 388432332. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:48,978][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:49,290][626795] Updated weights for policy 0, policy_version 311742 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:51,249][626795] Updated weights for policy 0, policy_version 311752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:53,127][626795] Updated weights for policy 0, policy_version 311762 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:53,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.1, 300 sec: 41348.8). Total num frames: 2553987072. Throughput: 0: 10462.8. Samples: 388494564. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:53,976][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:55,066][626795] Updated weights for policy 0, policy_version 311772 (0.0040)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:57,093][626795] Updated weights for policy 0, policy_version 311782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:58,975][24592] Fps is (10 sec: 41780.8, 60 sec: 41506.1, 300 sec: 41321.1). Total num frames: 2554191872. Throughput: 0: 10448.1. Samples: 388526028. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:35:58,976][24592] Avg episode reward: [(0, '4.548')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:35:59,001][626795] Updated weights for policy 0, policy_version 311792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:00,950][626795] Updated weights for policy 0, policy_version 311802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:02,815][626795] Updated weights for policy 0, policy_version 311812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.1, 300 sec: 41348.8). Total num frames: 2554404864. Throughput: 0: 10425.7. Samples: 388589478. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:03,976][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:04,881][626795] Updated weights for policy 0, policy_version 311822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:06,730][626795] Updated weights for policy 0, policy_version 311832 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:08,652][626795] Updated weights for policy 0, policy_version 311842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:08,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41915.7, 300 sec: 41348.8). Total num frames: 2554617856. Throughput: 0: 10426.1. Samples: 388653216. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:08,977][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:10,639][626795] Updated weights for policy 0, policy_version 311852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:13,023][626795] Updated weights for policy 0, policy_version 311862 (0.0560)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:13,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41506.1, 300 sec: 41404.3). Total num frames: 2554814464. Throughput: 0: 10377.4. Samples: 388682646. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:13,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:15,056][626795] Updated weights for policy 0, policy_version 311872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:16,879][626795] Updated weights for policy 0, policy_version 311882 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:18,749][626795] Updated weights for policy 0, policy_version 311892 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:18,975][24592] Fps is (10 sec: 40141.3, 60 sec: 41506.2, 300 sec: 41376.5). Total num frames: 2555019264. Throughput: 0: 10284.0. Samples: 388743030. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:18,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:20,883][626795] Updated weights for policy 0, policy_version 311902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:22,773][626795] Updated weights for policy 0, policy_version 311912 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:23,976][24592] Fps is (10 sec: 41775.3, 60 sec: 41505.5, 300 sec: 41404.3). Total num frames: 2555232256. Throughput: 0: 10317.8. Samples: 388806072. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:23,978][24592] Avg episode reward: [(0, '4.252')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:24,787][626795] Updated weights for policy 0, policy_version 311922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:26,631][626795] Updated weights for policy 0, policy_version 311932 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:27,177][626772] Signal inference workers to stop experience collection... (5050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:27,177][626772] Signal inference workers to resume experience collection... (5050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:27,184][626795] InferenceWorker_p0-w0: stopping experience collection (5050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:27,190][626795] InferenceWorker_p0-w0: resuming experience collection (5050 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:28,649][626795] Updated weights for policy 0, policy_version 311942 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:28,976][24592] Fps is (10 sec: 41776.7, 60 sec: 41369.3, 300 sec: 41376.5). Total num frames: 2555437056. Throughput: 0: 10413.9. Samples: 388837728. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:28,977][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:30,651][626795] Updated weights for policy 0, policy_version 311952 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:32,533][626795] Updated weights for policy 0, policy_version 311962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:33,976][24592] Fps is (10 sec: 41782.1, 60 sec: 41369.5, 300 sec: 41376.5). Total num frames: 2555650048. Throughput: 0: 10407.8. Samples: 388900680. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:33,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:34,411][626795] Updated weights for policy 0, policy_version 311972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:36,434][626795] Updated weights for policy 0, policy_version 311982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:38,309][626795] Updated weights for policy 0, policy_version 311992 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:38,980][24592] Fps is (10 sec: 42580.5, 60 sec: 41781.5, 300 sec: 41375.9). Total num frames: 2555863040. Throughput: 0: 10432.9. Samples: 388964094. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:38,982][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:40,343][626795] Updated weights for policy 0, policy_version 312002 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:42,144][626795] Updated weights for policy 0, policy_version 312012 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:43,975][24592] Fps is (10 sec: 42599.3, 60 sec: 41779.2, 300 sec: 41404.3). Total num frames: 2556076032. Throughput: 0: 10433.9. Samples: 388995552. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:43,976][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:44,179][626795] Updated weights for policy 0, policy_version 312022 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:46,637][626795] Updated weights for policy 0, policy_version 312032 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:48,657][626795] Updated weights for policy 0, policy_version 312042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:48,975][24592] Fps is (10 sec: 40160.0, 60 sec: 41506.5, 300 sec: 41404.3). Total num frames: 2556264448. Throughput: 0: 10311.6. Samples: 389053500. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:48,978][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:50,601][626795] Updated weights for policy 0, policy_version 312052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:52,357][626795] Updated weights for policy 0, policy_version 312062 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:53,975][24592] Fps is (10 sec: 39321.4, 60 sec: 41369.6, 300 sec: 41376.6). Total num frames: 2556469248. Throughput: 0: 10303.7. Samples: 389116884. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:53,978][24592] Avg episode reward: [(0, '4.406')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:54,345][626795] Updated weights for policy 0, policy_version 312072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:56,337][626795] Updated weights for policy 0, policy_version 312082 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:36:58,278][626795] Updated weights for policy 0, policy_version 312092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:58,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.2, 300 sec: 41404.3). Total num frames: 2556682240. Throughput: 0: 10346.0. Samples: 389148216. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:36:58,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:00,094][626795] Updated weights for policy 0, policy_version 312102 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:02,082][626795] Updated weights for policy 0, policy_version 312112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:03,976][24592] Fps is (10 sec: 42597.0, 60 sec: 41505.9, 300 sec: 41404.3). Total num frames: 2556895232. Throughput: 0: 10427.9. Samples: 389212290. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:03,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:04,032][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000312122_2556903424.pth...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:04,034][626795] Updated weights for policy 0, policy_version 312122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:04,193][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000310904_2546925568.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:06,086][626795] Updated weights for policy 0, policy_version 312132 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:07,814][626795] Updated weights for policy 0, policy_version 312142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:08,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41369.6, 300 sec: 41376.5). Total num frames: 2557100032. Throughput: 0: 10418.6. Samples: 389274900. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:08,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:09,912][626795] Updated weights for policy 0, policy_version 312152 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:11,825][626795] Updated weights for policy 0, policy_version 312162 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:13,710][626795] Updated weights for policy 0, policy_version 312172 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:13,975][24592] Fps is (10 sec: 42600.0, 60 sec: 41779.2, 300 sec: 41432.1). Total num frames: 2557321216. Throughput: 0: 10422.8. Samples: 389306748. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:13,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:15,637][626795] Updated weights for policy 0, policy_version 312182 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:18,176][626795] Updated weights for policy 0, policy_version 312192 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:18,975][24592] Fps is (10 sec: 40140.9, 60 sec: 41369.5, 300 sec: 41432.1). Total num frames: 2557501440. Throughput: 0: 10295.4. Samples: 389363970. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:18,976][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:20,225][626795] Updated weights for policy 0, policy_version 312202 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:22,153][626795] Updated weights for policy 0, policy_version 312212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:23,975][24592] Fps is (10 sec: 39321.6, 60 sec: 41370.2, 300 sec: 41432.1). Total num frames: 2557714432. Throughput: 0: 10298.4. Samples: 389427474. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:23,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:24,058][626795] Updated weights for policy 0, policy_version 312222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:26,097][626795] Updated weights for policy 0, policy_version 312232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:27,873][626795] Updated weights for policy 0, policy_version 312242 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:28,975][24592] Fps is (10 sec: 42599.0, 60 sec: 41506.5, 300 sec: 41459.9). Total num frames: 2557927424. Throughput: 0: 10283.6. Samples: 389458314. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:28,976][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:29,989][626795] Updated weights for policy 0, policy_version 312252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:31,895][626795] Updated weights for policy 0, policy_version 312262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:33,840][626795] Updated weights for policy 0, policy_version 312272 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:33,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.8, 300 sec: 41432.2). Total num frames: 2558132224. Throughput: 0: 10379.6. Samples: 389520582. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:33,976][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:35,837][626795] Updated weights for policy 0, policy_version 312282 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:37,789][626795] Updated weights for policy 0, policy_version 312292 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:38,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41372.9, 300 sec: 41459.8). Total num frames: 2558345216. Throughput: 0: 10390.1. Samples: 389584440. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:38,977][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:39,702][626795] Updated weights for policy 0, policy_version 312302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:41,691][626795] Updated weights for policy 0, policy_version 312312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:43,422][626795] Updated weights for policy 0, policy_version 312322 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:43,976][24592] Fps is (10 sec: 41778.7, 60 sec: 41233.0, 300 sec: 41459.8). Total num frames: 2558550016. Throughput: 0: 10396.6. Samples: 389616066. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:43,978][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:45,479][626795] Updated weights for policy 0, policy_version 312332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:47,562][626795] Updated weights for policy 0, policy_version 312342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:48,976][24592] Fps is (10 sec: 41775.5, 60 sec: 41642.0, 300 sec: 41459.7). Total num frames: 2558763008. Throughput: 0: 10358.8. Samples: 389678442. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:48,978][24592] Avg episode reward: [(0, '4.564')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:49,555][626795] Updated weights for policy 0, policy_version 312352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:51,951][626795] Updated weights for policy 0, policy_version 312362 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:53,945][626795] Updated weights for policy 0, policy_version 312372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:53,975][24592] Fps is (10 sec: 40141.1, 60 sec: 41369.6, 300 sec: 41487.6). Total num frames: 2558951424. Throughput: 0: 10248.7. Samples: 389736090. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:53,976][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:55,790][626795] Updated weights for policy 0, policy_version 312382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:57,783][626795] Updated weights for policy 0, policy_version 312392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:58,975][24592] Fps is (10 sec: 40144.5, 60 sec: 41369.6, 300 sec: 41487.6). Total num frames: 2559164416. Throughput: 0: 10240.8. Samples: 389767584. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:37:58,979][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:37:59,617][626795] Updated weights for policy 0, policy_version 312402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:01,640][626795] Updated weights for policy 0, policy_version 312412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:03,618][626795] Updated weights for policy 0, policy_version 312422 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:03,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41369.7, 300 sec: 41515.4). Total num frames: 2559377408. Throughput: 0: 10387.4. Samples: 389831406. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:03,976][24592] Avg episode reward: [(0, '4.830')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:05,509][626795] Updated weights for policy 0, policy_version 312432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:07,387][626795] Updated weights for policy 0, policy_version 312442 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:08,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41506.2, 300 sec: 41515.4). Total num frames: 2559590400. Throughput: 0: 10383.2. Samples: 389894718. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:08,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:09,476][626795] Updated weights for policy 0, policy_version 312452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:11,374][626795] Updated weights for policy 0, policy_version 312462 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:13,242][626795] Updated weights for policy 0, policy_version 312472 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:13,975][24592] Fps is (10 sec: 41780.1, 60 sec: 41233.1, 300 sec: 41515.4). Total num frames: 2559795200. Throughput: 0: 10409.1. Samples: 389926722. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:13,977][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:15,236][626795] Updated weights for policy 0, policy_version 312482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:17,153][626795] Updated weights for policy 0, policy_version 312492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:18,976][24592] Fps is (10 sec: 41777.5, 60 sec: 41779.0, 300 sec: 41515.3). Total num frames: 2560008192. Throughput: 0: 10418.4. Samples: 389989416. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:18,980][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:19,089][626795] Updated weights for policy 0, policy_version 312502 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:21,041][626795] Updated weights for policy 0, policy_version 312512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:23,557][626795] Updated weights for policy 0, policy_version 312522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:23,975][24592] Fps is (10 sec: 39321.7, 60 sec: 41233.1, 300 sec: 41487.6). Total num frames: 2560188416. Throughput: 0: 10277.5. Samples: 390046926. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:23,978][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:25,477][626795] Updated weights for policy 0, policy_version 312532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:27,462][626795] Updated weights for policy 0, policy_version 312542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:28,976][24592] Fps is (10 sec: 39321.2, 60 sec: 41232.7, 300 sec: 41515.3). Total num frames: 2560401408. Throughput: 0: 10273.4. Samples: 390078372. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:28,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:29,332][626795] Updated weights for policy 0, policy_version 312552 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:31,282][626795] Updated weights for policy 0, policy_version 312562 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:33,235][626795] Updated weights for policy 0, policy_version 312572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:33,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41369.6, 300 sec: 41515.4). Total num frames: 2560614400. Throughput: 0: 10303.1. Samples: 390142074. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:33,976][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:35,114][626795] Updated weights for policy 0, policy_version 312582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:37,095][626795] Updated weights for policy 0, policy_version 312592 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:38,975][24592] Fps is (10 sec: 42600.4, 60 sec: 41369.6, 300 sec: 41515.4). Total num frames: 2560827392. Throughput: 0: 10439.1. Samples: 390205848. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:38,977][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:39,111][626795] Updated weights for policy 0, policy_version 312602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:41,075][626795] Updated weights for policy 0, policy_version 312612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:42,809][626795] Updated weights for policy 0, policy_version 312622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:43,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41506.2, 300 sec: 41543.2). Total num frames: 2561040384. Throughput: 0: 10430.7. Samples: 390236964. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:43,976][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:44,874][626795] Updated weights for policy 0, policy_version 312632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:46,850][626795] Updated weights for policy 0, policy_version 312642 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:48,632][626795] Updated weights for policy 0, policy_version 312652 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:48,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41506.6, 300 sec: 41570.9). Total num frames: 2561253376. Throughput: 0: 10434.7. Samples: 390300966. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:48,977][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:50,603][626795] Updated weights for policy 0, policy_version 312662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:52,577][626795] Updated weights for policy 0, policy_version 312672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:53,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41915.8, 300 sec: 41570.9). Total num frames: 2561466368. Throughput: 0: 10447.3. Samples: 390364848. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:53,976][24592] Avg episode reward: [(0, '4.404')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:54,507][626795] Updated weights for policy 0, policy_version 312682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:57,068][626795] Updated weights for policy 0, policy_version 312692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:38:58,910][626795] Updated weights for policy 0, policy_version 312702 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:58,975][24592] Fps is (10 sec: 40141.5, 60 sec: 41506.1, 300 sec: 41570.9). Total num frames: 2561654784. Throughput: 0: 10295.3. Samples: 390390012. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:38:58,976][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:00,904][626795] Updated weights for policy 0, policy_version 312712 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:02,846][626795] Updated weights for policy 0, policy_version 312722 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:03,975][24592] Fps is (10 sec: 39321.4, 60 sec: 41369.7, 300 sec: 41543.1). Total num frames: 2561859584. Throughput: 0: 10307.8. Samples: 390453264. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:03,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000312727_2561859584.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:04,105][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000311514_2551922688.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:04,959][626795] Updated weights for policy 0, policy_version 312732 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:06,846][626795] Updated weights for policy 0, policy_version 312742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:08,733][626795] Updated weights for policy 0, policy_version 312752 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:08,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41369.6, 300 sec: 41543.2). Total num frames: 2562072576. Throughput: 0: 10417.9. Samples: 390515730. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:08,976][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:10,669][626795] Updated weights for policy 0, policy_version 312762 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:12,635][626795] Updated weights for policy 0, policy_version 312772 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:13,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41506.2, 300 sec: 41543.2). Total num frames: 2562285568. Throughput: 0: 10417.7. Samples: 390547164. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:13,977][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:14,675][626795] Updated weights for policy 0, policy_version 312782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:16,538][626795] Updated weights for policy 0, policy_version 312792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:18,382][626795] Updated weights for policy 0, policy_version 312802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:18,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41369.8, 300 sec: 41515.4). Total num frames: 2562490368. Throughput: 0: 10417.6. Samples: 390610866. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:18,977][24592] Avg episode reward: [(0, '4.880')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:20,457][626795] Updated weights for policy 0, policy_version 312812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:22,289][626795] Updated weights for policy 0, policy_version 312822 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.7, 300 sec: 41515.4). Total num frames: 2562703360. Throughput: 0: 10426.1. Samples: 390675024. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:23,977][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:24,229][626795] Updated weights for policy 0, policy_version 312832 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:26,098][626795] Updated weights for policy 0, policy_version 312842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:28,158][626795] Updated weights for policy 0, policy_version 312852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:28,976][24592] Fps is (10 sec: 40139.2, 60 sec: 41506.1, 300 sec: 41515.3). Total num frames: 2562891776. Throughput: 0: 10434.4. Samples: 390706518. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:28,977][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:30,560][626795] Updated weights for policy 0, policy_version 312862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:32,469][626795] Updated weights for policy 0, policy_version 312872 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:33,977][24592] Fps is (10 sec: 40135.8, 60 sec: 41505.3, 300 sec: 41515.2). Total num frames: 2563104768. Throughput: 0: 10294.3. Samples: 390764220. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:33,979][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:34,548][626795] Updated weights for policy 0, policy_version 312882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:36,446][626795] Updated weights for policy 0, policy_version 312892 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:38,280][626795] Updated weights for policy 0, policy_version 312902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:38,975][24592] Fps is (10 sec: 42600.2, 60 sec: 41506.1, 300 sec: 41515.4). Total num frames: 2563317760. Throughput: 0: 10282.2. Samples: 390827550. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:38,977][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:40,223][626795] Updated weights for policy 0, policy_version 312912 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:42,193][626795] Updated weights for policy 0, policy_version 312922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:43,975][24592] Fps is (10 sec: 42603.8, 60 sec: 41506.2, 300 sec: 41487.6). Total num frames: 2563530752. Throughput: 0: 10417.5. Samples: 390858798. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:43,977][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:44,191][626795] Updated weights for policy 0, policy_version 312932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:46,109][626795] Updated weights for policy 0, policy_version 312942 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:47,968][626795] Updated weights for policy 0, policy_version 312952 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:48,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41369.8, 300 sec: 41487.6). Total num frames: 2563735552. Throughput: 0: 10439.9. Samples: 390923058. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:48,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:49,954][626795] Updated weights for policy 0, policy_version 312962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:51,868][626795] Updated weights for policy 0, policy_version 312972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:53,750][626795] Updated weights for policy 0, policy_version 312982 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:53,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41506.2, 300 sec: 41543.2). Total num frames: 2563956736. Throughput: 0: 10461.9. Samples: 390986514. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:53,977][24592] Avg episode reward: [(0, '4.860')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:55,812][626795] Updated weights for policy 0, policy_version 312992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:57,720][626795] Updated weights for policy 0, policy_version 313002 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:58,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41779.3, 300 sec: 41515.4). Total num frames: 2564161536. Throughput: 0: 10447.9. Samples: 391017318. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:39:58,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:39:59,654][626795] Updated weights for policy 0, policy_version 313012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:02,140][626795] Updated weights for policy 0, policy_version 313022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:03,975][24592] Fps is (10 sec: 38501.8, 60 sec: 41369.5, 300 sec: 41487.6). Total num frames: 2564341760. Throughput: 0: 10312.7. Samples: 391074936. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:03,976][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:04,163][626795] Updated weights for policy 0, policy_version 313032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:06,122][626795] Updated weights for policy 0, policy_version 313042 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:07,982][626795] Updated weights for policy 0, policy_version 313052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:08,975][24592] Fps is (10 sec: 40140.6, 60 sec: 41506.1, 300 sec: 41487.6). Total num frames: 2564562944. Throughput: 0: 10296.9. Samples: 391138386. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:08,977][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:10,012][626795] Updated weights for policy 0, policy_version 313062 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:11,968][626795] Updated weights for policy 0, policy_version 313072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:13,807][626795] Updated weights for policy 0, policy_version 313082 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:13,976][24592] Fps is (10 sec: 42597.7, 60 sec: 41369.4, 300 sec: 41487.6). Total num frames: 2564767744. Throughput: 0: 10296.2. Samples: 391169844. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:13,977][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:15,713][626795] Updated weights for policy 0, policy_version 313092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:17,810][626795] Updated weights for policy 0, policy_version 313102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:18,975][24592] Fps is (10 sec: 42598.0, 60 sec: 41642.7, 300 sec: 41515.4). Total num frames: 2564988928. Throughput: 0: 10432.3. Samples: 391233660. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:18,976][24592] Avg episode reward: [(0, '4.990')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:19,592][626795] Updated weights for policy 0, policy_version 313112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:21,542][626795] Updated weights for policy 0, policy_version 313122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:23,445][626795] Updated weights for policy 0, policy_version 313132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:23,975][24592] Fps is (10 sec: 41780.4, 60 sec: 41369.6, 300 sec: 41459.9). Total num frames: 2565185536. Throughput: 0: 10432.3. Samples: 391297002. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:23,976][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:25,486][626795] Updated weights for policy 0, policy_version 313142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:27,347][626795] Updated weights for policy 0, policy_version 313152 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:28,976][24592] Fps is (10 sec: 41779.1, 60 sec: 41916.0, 300 sec: 41487.6). Total num frames: 2565406720. Throughput: 0: 10420.9. Samples: 391327740. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:28,977][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:29,484][626795] Updated weights for policy 0, policy_version 313162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:31,232][626795] Updated weights for policy 0, policy_version 313172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:33,275][626795] Updated weights for policy 0, policy_version 313182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:34,435][24592] Fps is (10 sec: 39945.0, 60 sec: 41327.3, 300 sec: 41451.8). Total num frames: 2565603328. Throughput: 0: 10287.9. Samples: 391390740. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:34,436][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:35,791][626795] Updated weights for policy 0, policy_version 313192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:37,755][626795] Updated weights for policy 0, policy_version 313202 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:38,977][24592] Fps is (10 sec: 39314.5, 60 sec: 41368.3, 300 sec: 41459.6). Total num frames: 2565799936. Throughput: 0: 10256.6. Samples: 391448082. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:38,980][24592] Avg episode reward: [(0, '4.915')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:39,755][626795] Updated weights for policy 0, policy_version 313212 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:41,596][626795] Updated weights for policy 0, policy_version 313222 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:43,535][626795] Updated weights for policy 0, policy_version 313232 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:43,976][24592] Fps is (10 sec: 42929.6, 60 sec: 41369.3, 300 sec: 41487.6). Total num frames: 2566012928. Throughput: 0: 10270.6. Samples: 391479498. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:43,978][24592] Avg episode reward: [(0, '4.839')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:45,587][626795] Updated weights for policy 0, policy_version 313242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:47,540][626795] Updated weights for policy 0, policy_version 313252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:48,975][24592] Fps is (10 sec: 41787.2, 60 sec: 41369.5, 300 sec: 41459.8). Total num frames: 2566217728. Throughput: 0: 10387.8. Samples: 391542384. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:48,977][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:49,395][626795] Updated weights for policy 0, policy_version 313262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:51,433][626795] Updated weights for policy 0, policy_version 313272 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:53,383][626795] Updated weights for policy 0, policy_version 313282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:53,976][24592] Fps is (10 sec: 41779.7, 60 sec: 41232.9, 300 sec: 41487.6). Total num frames: 2566430720. Throughput: 0: 10388.3. Samples: 391605864. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:53,977][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:55,338][626795] Updated weights for policy 0, policy_version 313292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:57,278][626795] Updated weights for policy 0, policy_version 313302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:58,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41233.1, 300 sec: 41459.8). Total num frames: 2566635520. Throughput: 0: 10372.6. Samples: 391636608. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:40:58,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:40:59,297][626795] Updated weights for policy 0, policy_version 313312 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:01,193][626795] Updated weights for policy 0, policy_version 313322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:03,219][626795] Updated weights for policy 0, policy_version 313332 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:03,975][24592] Fps is (10 sec: 40960.9, 60 sec: 41642.7, 300 sec: 41432.1). Total num frames: 2566840320. Throughput: 0: 10342.6. Samples: 391699074. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:03,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000313335_2566840320.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:04,146][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000312122_2556903424.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:05,333][626795] Updated weights for policy 0, policy_version 313342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:07,696][626795] Updated weights for policy 0, policy_version 313352 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:08,976][24592] Fps is (10 sec: 38500.9, 60 sec: 40959.7, 300 sec: 41376.5). Total num frames: 2567020544. Throughput: 0: 10159.9. Samples: 391754202. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:08,979][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:09,809][626795] Updated weights for policy 0, policy_version 313362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:11,729][626795] Updated weights for policy 0, policy_version 313372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:13,704][626795] Updated weights for policy 0, policy_version 313382 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:13,975][24592] Fps is (10 sec: 39321.7, 60 sec: 41096.7, 300 sec: 41404.3). Total num frames: 2567233536. Throughput: 0: 10176.0. Samples: 391785660. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:13,980][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:15,592][626795] Updated weights for policy 0, policy_version 313392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:17,503][626795] Updated weights for policy 0, policy_version 313402 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:18,975][24592] Fps is (10 sec: 42599.7, 60 sec: 40960.0, 300 sec: 41404.4). Total num frames: 2567446528. Throughput: 0: 10287.2. Samples: 391848942. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:18,976][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:19,529][626795] Updated weights for policy 0, policy_version 313412 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:21,460][626795] Updated weights for policy 0, policy_version 313422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:23,376][626795] Updated weights for policy 0, policy_version 313432 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:23,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41233.1, 300 sec: 41432.2). Total num frames: 2567659520. Throughput: 0: 10316.5. Samples: 391912302. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:23,976][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:25,333][626795] Updated weights for policy 0, policy_version 313442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:27,238][626795] Updated weights for policy 0, policy_version 313452 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:28,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40960.1, 300 sec: 41404.3). Total num frames: 2567864320. Throughput: 0: 10312.1. Samples: 391943538. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:28,976][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:29,317][626795] Updated weights for policy 0, policy_version 313462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:31,053][626795] Updated weights for policy 0, policy_version 313472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:33,019][626795] Updated weights for policy 0, policy_version 313482 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:33,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41551.0, 300 sec: 41405.0). Total num frames: 2568077312. Throughput: 0: 10344.3. Samples: 392007876. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:33,977][24592] Avg episode reward: [(0, '4.889')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:34,975][626795] Updated weights for policy 0, policy_version 313492 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:36,972][626795] Updated weights for policy 0, policy_version 313502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:38,837][626795] Updated weights for policy 0, policy_version 313512 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:38,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41507.5, 300 sec: 41404.3). Total num frames: 2568290304. Throughput: 0: 10330.6. Samples: 392070738. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:38,977][24592] Avg episode reward: [(0, '4.388')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:41,460][626795] Updated weights for policy 0, policy_version 313522 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:43,291][626795] Updated weights for policy 0, policy_version 313532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:43,976][24592] Fps is (10 sec: 40139.3, 60 sec: 41096.5, 300 sec: 41404.3). Total num frames: 2568478720. Throughput: 0: 10224.3. Samples: 392096706. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:43,977][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:45,415][626795] Updated weights for policy 0, policy_version 313542 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:47,151][626795] Updated weights for policy 0, policy_version 313552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:48,975][24592] Fps is (10 sec: 40140.7, 60 sec: 41233.1, 300 sec: 41432.1). Total num frames: 2568691712. Throughput: 0: 10228.4. Samples: 392159352. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:48,976][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:49,188][626795] Updated weights for policy 0, policy_version 313562 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:51,140][626795] Updated weights for policy 0, policy_version 313572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:53,122][626795] Updated weights for policy 0, policy_version 313582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:53,975][24592] Fps is (10 sec: 42600.0, 60 sec: 41233.2, 300 sec: 41432.1). Total num frames: 2568904704. Throughput: 0: 10404.3. Samples: 392222394. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:53,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:55,060][626795] Updated weights for policy 0, policy_version 313592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:56,900][626795] Updated weights for policy 0, policy_version 313602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:41:58,902][626795] Updated weights for policy 0, policy_version 313612 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:58,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.1, 300 sec: 41404.4). Total num frames: 2569109504. Throughput: 0: 10409.2. Samples: 392254074. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:41:58,977][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:00,795][626795] Updated weights for policy 0, policy_version 313622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:02,790][626795] Updated weights for policy 0, policy_version 313632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:03,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41233.1, 300 sec: 41404.3). Total num frames: 2569314304. Throughput: 0: 10413.9. Samples: 392317566. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:03,976][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:04,822][626795] Updated weights for policy 0, policy_version 313642 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:06,650][626795] Updated weights for policy 0, policy_version 313652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:08,607][626795] Updated weights for policy 0, policy_version 313662 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:08,976][24592] Fps is (10 sec: 42598.2, 60 sec: 41916.0, 300 sec: 41404.3). Total num frames: 2569535488. Throughput: 0: 10419.6. Samples: 392381184. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:08,978][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:10,577][626795] Updated weights for policy 0, policy_version 313672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:13,124][626795] Updated weights for policy 0, policy_version 313682 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:13,975][24592] Fps is (10 sec: 40140.5, 60 sec: 41369.6, 300 sec: 41404.3). Total num frames: 2569715712. Throughput: 0: 10422.9. Samples: 392412570. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:13,976][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:15,021][626795] Updated weights for policy 0, policy_version 313692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:16,968][626795] Updated weights for policy 0, policy_version 313702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:18,976][24592] Fps is (10 sec: 38501.0, 60 sec: 41232.8, 300 sec: 41376.5). Total num frames: 2569920512. Throughput: 0: 10253.1. Samples: 392469270. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:18,977][24592] Avg episode reward: [(0, '5.028')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:19,021][626795] Updated weights for policy 0, policy_version 313712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:20,924][626795] Updated weights for policy 0, policy_version 313722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:22,890][626795] Updated weights for policy 0, policy_version 313732 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:23,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41369.6, 300 sec: 41404.3). Total num frames: 2570141696. Throughput: 0: 10252.0. Samples: 392532078. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:23,979][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:24,664][626795] Updated weights for policy 0, policy_version 313742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:26,712][626795] Updated weights for policy 0, policy_version 313752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:28,701][626795] Updated weights for policy 0, policy_version 313762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:28,975][24592] Fps is (10 sec: 43419.3, 60 sec: 41506.2, 300 sec: 41432.1). Total num frames: 2570354688. Throughput: 0: 10394.4. Samples: 392564448. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:28,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:30,515][626795] Updated weights for policy 0, policy_version 313772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:32,501][626795] Updated weights for policy 0, policy_version 313782 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:32,519][626772] Signal inference workers to stop experience collection... (5100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:32,526][626772] Signal inference workers to resume experience collection... (5100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:32,534][626795] InferenceWorker_p0-w0: stopping experience collection (5100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:32,538][626795] InferenceWorker_p0-w0: resuming experience collection (5100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:33,976][24592] Fps is (10 sec: 42596.8, 60 sec: 41505.9, 300 sec: 41432.0). Total num frames: 2570567680. Throughput: 0: 10414.4. Samples: 392628006. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:33,977][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:34,529][626795] Updated weights for policy 0, policy_version 313792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:36,238][626795] Updated weights for policy 0, policy_version 313802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:38,279][626795] Updated weights for policy 0, policy_version 313812 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:38,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2570772480. Throughput: 0: 10427.4. Samples: 392691624. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:38,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:40,290][626795] Updated weights for policy 0, policy_version 313822 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:42,111][626795] Updated weights for policy 0, policy_version 313832 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:43,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41779.5, 300 sec: 41432.2). Total num frames: 2570985472. Throughput: 0: 10422.8. Samples: 392723100. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:43,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:44,058][626795] Updated weights for policy 0, policy_version 313842 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:46,643][626795] Updated weights for policy 0, policy_version 313852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:48,485][626795] Updated weights for policy 0, policy_version 313862 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:48,975][24592] Fps is (10 sec: 40140.6, 60 sec: 41369.6, 300 sec: 41432.1). Total num frames: 2571173888. Throughput: 0: 10292.0. Samples: 392780706. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:48,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:50,557][626795] Updated weights for policy 0, policy_version 313872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:52,398][626795] Updated weights for policy 0, policy_version 313882 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:53,975][24592] Fps is (10 sec: 39321.7, 60 sec: 41233.1, 300 sec: 41404.3). Total num frames: 2571378688. Throughput: 0: 10276.0. Samples: 392843604. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:53,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:54,432][626795] Updated weights for policy 0, policy_version 313892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:56,329][626795] Updated weights for policy 0, policy_version 313902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:42:58,250][626795] Updated weights for policy 0, policy_version 313912 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:58,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41369.4, 300 sec: 41404.3). Total num frames: 2571591680. Throughput: 0: 10278.6. Samples: 392875110. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:42:58,977][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:00,224][626795] Updated weights for policy 0, policy_version 313922 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:02,174][626795] Updated weights for policy 0, policy_version 313932 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:03,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41506.2, 300 sec: 41404.3). Total num frames: 2571804672. Throughput: 0: 10437.4. Samples: 392938950. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:03,977][24592] Avg episode reward: [(0, '4.463')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000313941_2571804672.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:04,084][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000312727_2561859584.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:04,161][626795] Updated weights for policy 0, policy_version 313942 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:06,001][626795] Updated weights for policy 0, policy_version 313952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:07,941][626795] Updated weights for policy 0, policy_version 313962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:08,976][24592] Fps is (10 sec: 42598.8, 60 sec: 41369.5, 300 sec: 41432.1). Total num frames: 2572017664. Throughput: 0: 10468.7. Samples: 393003174. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:08,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:09,833][626795] Updated weights for policy 0, policy_version 313972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:11,743][626795] Updated weights for policy 0, policy_version 313982 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:13,678][626795] Updated weights for policy 0, policy_version 313992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:13,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41915.8, 300 sec: 41432.1). Total num frames: 2572230656. Throughput: 0: 10451.2. Samples: 393034752. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:13,976][24592] Avg episode reward: [(0, '4.911')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:15,681][626795] Updated weights for policy 0, policy_version 314002 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:17,515][626795] Updated weights for policy 0, policy_version 314012 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:18,976][24592] Fps is (10 sec: 42597.9, 60 sec: 42052.3, 300 sec: 41543.1). Total num frames: 2572443648. Throughput: 0: 10460.4. Samples: 393098724. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:18,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:19,465][626795] Updated weights for policy 0, policy_version 314022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:21,446][626795] Updated weights for policy 0, policy_version 314032 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:23,341][626795] Updated weights for policy 0, policy_version 314042 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41779.2, 300 sec: 41515.5). Total num frames: 2572648448. Throughput: 0: 10438.0. Samples: 393161334. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:23,976][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:25,433][626795] Updated weights for policy 0, policy_version 314052 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:27,344][626795] Updated weights for policy 0, policy_version 314062 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:28,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41779.2, 300 sec: 41515.4). Total num frames: 2572861440. Throughput: 0: 10429.3. Samples: 393192420. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:28,977][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:29,403][626795] Updated weights for policy 0, policy_version 314072 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:31,393][626795] Updated weights for policy 0, policy_version 314082 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:33,243][626795] Updated weights for policy 0, policy_version 314092 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:33,975][24592] Fps is (10 sec: 39321.6, 60 sec: 41233.3, 300 sec: 41404.3). Total num frames: 2573041664. Throughput: 0: 10510.4. Samples: 393253674. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:33,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:36,034][626795] Updated weights for policy 0, policy_version 314102 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:38,047][626795] Updated weights for policy 0, policy_version 314112 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:38,975][24592] Fps is (10 sec: 38502.5, 60 sec: 41233.0, 300 sec: 41376.5). Total num frames: 2573246464. Throughput: 0: 10354.2. Samples: 393309546. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:38,979][24592] Avg episode reward: [(0, '4.435')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:39,972][626795] Updated weights for policy 0, policy_version 314122 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:41,796][626795] Updated weights for policy 0, policy_version 314132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:43,758][626795] Updated weights for policy 0, policy_version 314142 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:43,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41369.6, 300 sec: 41404.3). Total num frames: 2573467648. Throughput: 0: 10359.4. Samples: 393341280. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:43,978][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:45,734][626795] Updated weights for policy 0, policy_version 314152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:47,633][626795] Updated weights for policy 0, policy_version 314162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:48,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41642.7, 300 sec: 41376.5). Total num frames: 2573672448. Throughput: 0: 10338.0. Samples: 393404160. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:48,976][24592] Avg episode reward: [(0, '5.001')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:49,688][626795] Updated weights for policy 0, policy_version 314172 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:51,545][626795] Updated weights for policy 0, policy_version 314182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:53,350][626795] Updated weights for policy 0, policy_version 314192 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:53,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41642.6, 300 sec: 41432.1). Total num frames: 2573877248. Throughput: 0: 10349.4. Samples: 393468894. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:53,977][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:55,380][626795] Updated weights for policy 0, policy_version 314202 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:57,274][626795] Updated weights for policy 0, policy_version 314212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:58,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41642.9, 300 sec: 41459.9). Total num frames: 2574090240. Throughput: 0: 10343.7. Samples: 393500220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:43:58,976][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:43:59,177][626795] Updated weights for policy 0, policy_version 314222 (0.0035)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:01,121][626795] Updated weights for policy 0, policy_version 314232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:02,970][626795] Updated weights for policy 0, policy_version 314242 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:03,975][24592] Fps is (10 sec: 43417.5, 60 sec: 41779.1, 300 sec: 41487.6). Total num frames: 2574311424. Throughput: 0: 10366.6. Samples: 393565218. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:03,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:04,897][626795] Updated weights for policy 0, policy_version 314252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:07,347][626795] Updated weights for policy 0, policy_version 314262 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:08,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41506.3, 300 sec: 41432.1). Total num frames: 2574508032. Throughput: 0: 10282.5. Samples: 393624048. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:08,977][24592] Avg episode reward: [(0, '4.492')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:09,165][626795] Updated weights for policy 0, policy_version 314272 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:11,059][626795] Updated weights for policy 0, policy_version 314282 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:13,051][626795] Updated weights for policy 0, policy_version 314292 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:13,975][24592] Fps is (10 sec: 40960.3, 60 sec: 41506.1, 300 sec: 41459.9). Total num frames: 2574721024. Throughput: 0: 10295.2. Samples: 393655704. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:13,978][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:14,949][626795] Updated weights for policy 0, policy_version 314302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:16,783][626795] Updated weights for policy 0, policy_version 314312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:18,793][626795] Updated weights for policy 0, policy_version 314322 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:18,975][24592] Fps is (10 sec: 43417.6, 60 sec: 41642.9, 300 sec: 41487.6). Total num frames: 2574942208. Throughput: 0: 10376.8. Samples: 393720630. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:18,976][24592] Avg episode reward: [(0, '4.420')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:20,556][626795] Updated weights for policy 0, policy_version 314332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:22,449][626795] Updated weights for policy 0, policy_version 314342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:23,976][24592] Fps is (10 sec: 43416.7, 60 sec: 41779.1, 300 sec: 41571.0). Total num frames: 2575155200. Throughput: 0: 10581.3. Samples: 393785706. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:23,978][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:24,453][626795] Updated weights for policy 0, policy_version 314352 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:26,359][626795] Updated weights for policy 0, policy_version 314362 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:28,163][626795] Updated weights for policy 0, policy_version 314372 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:28,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41779.3, 300 sec: 41571.1). Total num frames: 2575368192. Throughput: 0: 10587.2. Samples: 393817704. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:28,976][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:30,127][626795] Updated weights for policy 0, policy_version 314382 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:32,075][626795] Updated weights for policy 0, policy_version 314392 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:33,867][626795] Updated weights for policy 0, policy_version 314402 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:33,975][24592] Fps is (10 sec: 42599.2, 60 sec: 42325.3, 300 sec: 41570.9). Total num frames: 2575581184. Throughput: 0: 10615.6. Samples: 393881862. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:33,978][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:35,817][626795] Updated weights for policy 0, policy_version 314412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:37,726][626795] Updated weights for policy 0, policy_version 314422 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:39,490][24592] Fps is (10 sec: 40515.0, 60 sec: 42101.1, 300 sec: 41498.6). Total num frames: 2575794176. Throughput: 0: 10492.1. Samples: 393946434. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:39,491][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:40,193][626795] Updated weights for policy 0, policy_version 314432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:42,129][626795] Updated weights for policy 0, policy_version 314442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:43,977][24592] Fps is (10 sec: 40136.3, 60 sec: 41914.9, 300 sec: 41515.2). Total num frames: 2575982592. Throughput: 0: 10489.6. Samples: 393972264. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:43,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:44,083][626795] Updated weights for policy 0, policy_version 314452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:46,076][626795] Updated weights for policy 0, policy_version 314462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:47,778][626795] Updated weights for policy 0, policy_version 314472 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:48,975][24592] Fps is (10 sec: 42316.9, 60 sec: 42052.3, 300 sec: 41487.6). Total num frames: 2576195584. Throughput: 0: 10480.4. Samples: 394036836. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:48,977][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:49,846][626795] Updated weights for policy 0, policy_version 314482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:51,647][626795] Updated weights for policy 0, policy_version 314492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:53,571][626795] Updated weights for policy 0, policy_version 314502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:53,975][24592] Fps is (10 sec: 43422.6, 60 sec: 42325.4, 300 sec: 41543.2). Total num frames: 2576416768. Throughput: 0: 10612.9. Samples: 394101630. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:53,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:55,423][626795] Updated weights for policy 0, policy_version 314512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:57,448][626795] Updated weights for policy 0, policy_version 314522 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:58,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42325.3, 300 sec: 41654.3). Total num frames: 2576629760. Throughput: 0: 10623.9. Samples: 394133778. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:44:58,978][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:44:59,227][626795] Updated weights for policy 0, policy_version 314532 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:01,259][626795] Updated weights for policy 0, policy_version 314542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:03,048][626795] Updated weights for policy 0, policy_version 314552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:03,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42188.8, 300 sec: 41626.5). Total num frames: 2576842752. Throughput: 0: 10608.4. Samples: 394198008. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:03,977][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000314556_2576842752.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:04,122][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000313335_2566840320.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:05,148][626795] Updated weights for policy 0, policy_version 314562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:06,981][626795] Updated weights for policy 0, policy_version 314572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:08,874][626795] Updated weights for policy 0, policy_version 314582 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:08,976][24592] Fps is (10 sec: 42596.7, 60 sec: 42461.6, 300 sec: 41654.2). Total num frames: 2577055744. Throughput: 0: 10591.0. Samples: 394262304. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:08,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:10,696][626795] Updated weights for policy 0, policy_version 314592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:13,344][626795] Updated weights for policy 0, policy_version 314602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:13,975][24592] Fps is (10 sec: 40140.9, 60 sec: 42052.3, 300 sec: 41543.2). Total num frames: 2577244160. Throughput: 0: 10602.0. Samples: 394294794. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:13,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:15,060][626795] Updated weights for policy 0, policy_version 314612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:17,041][626795] Updated weights for policy 0, policy_version 314622 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:18,900][626795] Updated weights for policy 0, policy_version 314632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:18,975][24592] Fps is (10 sec: 40961.4, 60 sec: 42052.2, 300 sec: 41626.5). Total num frames: 2577465344. Throughput: 0: 10457.7. Samples: 394352460. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:18,977][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:20,893][626795] Updated weights for policy 0, policy_version 314642 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:22,841][626795] Updated weights for policy 0, policy_version 314652 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:23,975][24592] Fps is (10 sec: 43417.3, 60 sec: 42052.4, 300 sec: 41598.7). Total num frames: 2577678336. Throughput: 0: 10584.0. Samples: 394417272. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:23,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:24,711][626795] Updated weights for policy 0, policy_version 314662 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:26,572][626795] Updated weights for policy 0, policy_version 314672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:28,478][626795] Updated weights for policy 0, policy_version 314682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:28,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42052.3, 300 sec: 41719.2). Total num frames: 2577891328. Throughput: 0: 10614.6. Samples: 394449906. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:28,976][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:30,257][626795] Updated weights for policy 0, policy_version 314692 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:32,195][626795] Updated weights for policy 0, policy_version 314702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:33,976][24592] Fps is (10 sec: 43416.8, 60 sec: 42188.7, 300 sec: 41737.8). Total num frames: 2578112512. Throughput: 0: 10635.7. Samples: 394515444. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:33,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:34,204][626795] Updated weights for policy 0, policy_version 314712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:36,033][626795] Updated weights for policy 0, policy_version 314722 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:37,854][626795] Updated weights for policy 0, policy_version 314732 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:38,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42553.5, 300 sec: 41737.6). Total num frames: 2578325504. Throughput: 0: 10608.8. Samples: 394579026. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:38,976][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:39,890][626795] Updated weights for policy 0, policy_version 314742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:41,928][626795] Updated weights for policy 0, policy_version 314752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:43,601][626795] Updated weights for policy 0, policy_version 314762 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:43,976][24592] Fps is (10 sec: 42597.3, 60 sec: 42598.9, 300 sec: 41765.2). Total num frames: 2578538496. Throughput: 0: 10614.1. Samples: 394611420. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:43,978][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:46,119][626795] Updated weights for policy 0, policy_version 314772 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:48,046][626795] Updated weights for policy 0, policy_version 314782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:48,975][24592] Fps is (10 sec: 40140.1, 60 sec: 42188.7, 300 sec: 41682.0). Total num frames: 2578726912. Throughput: 0: 10473.0. Samples: 394669296. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:48,976][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:50,130][626795] Updated weights for policy 0, policy_version 314792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:52,025][626795] Updated weights for policy 0, policy_version 314802 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:53,975][24592] Fps is (10 sec: 39323.1, 60 sec: 41915.7, 300 sec: 41682.0). Total num frames: 2578931712. Throughput: 0: 10443.3. Samples: 394732248. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:53,979][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:54,031][626795] Updated weights for policy 0, policy_version 314812 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:55,870][626795] Updated weights for policy 0, policy_version 314822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:57,737][626795] Updated weights for policy 0, policy_version 314832 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:58,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42052.2, 300 sec: 41737.5). Total num frames: 2579152896. Throughput: 0: 10431.2. Samples: 394764198. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:45:58,976][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:45:59,762][626795] Updated weights for policy 0, policy_version 314842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:01,649][626795] Updated weights for policy 0, policy_version 314852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:03,452][626795] Updated weights for policy 0, policy_version 314862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:03,977][24592] Fps is (10 sec: 43410.1, 60 sec: 42051.0, 300 sec: 41848.4). Total num frames: 2579365888. Throughput: 0: 10593.4. Samples: 394829184. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:03,979][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:05,403][626795] Updated weights for policy 0, policy_version 314872 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:07,340][626795] Updated weights for policy 0, policy_version 314882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:08,975][24592] Fps is (10 sec: 43417.8, 60 sec: 42189.1, 300 sec: 41876.4). Total num frames: 2579587072. Throughput: 0: 10590.9. Samples: 394893864. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:08,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:09,301][626795] Updated weights for policy 0, policy_version 314892 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:11,030][626795] Updated weights for policy 0, policy_version 314902 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:13,022][626795] Updated weights for policy 0, policy_version 314912 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:13,976][24592] Fps is (10 sec: 43425.3, 60 sec: 42598.3, 300 sec: 41876.4). Total num frames: 2579800064. Throughput: 0: 10566.9. Samples: 394925418. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:13,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:14,892][626795] Updated weights for policy 0, policy_version 314922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:16,895][626795] Updated weights for policy 0, policy_version 314932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:18,975][24592] Fps is (10 sec: 40140.3, 60 sec: 42052.2, 300 sec: 41793.1). Total num frames: 2579988480. Throughput: 0: 10506.2. Samples: 394988220. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:18,976][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:19,333][626795] Updated weights for policy 0, policy_version 314942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:21,348][626795] Updated weights for policy 0, policy_version 314952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:23,176][626795] Updated weights for policy 0, policy_version 314962 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:23,975][24592] Fps is (10 sec: 39321.9, 60 sec: 41915.8, 300 sec: 41793.1). Total num frames: 2580193280. Throughput: 0: 10405.6. Samples: 395047278. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:23,979][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:25,148][626795] Updated weights for policy 0, policy_version 314972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:27,010][626795] Updated weights for policy 0, policy_version 314982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:28,882][626795] Updated weights for policy 0, policy_version 314992 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:28,976][24592] Fps is (10 sec: 42597.3, 60 sec: 42052.0, 300 sec: 41820.8). Total num frames: 2580414464. Throughput: 0: 10407.6. Samples: 395079762. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:28,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:30,679][626795] Updated weights for policy 0, policy_version 315002 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:32,611][626795] Updated weights for policy 0, policy_version 315012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:33,975][24592] Fps is (10 sec: 44236.7, 60 sec: 42052.4, 300 sec: 41848.6). Total num frames: 2580635648. Throughput: 0: 10591.0. Samples: 395145888. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:33,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:34,490][626795] Updated weights for policy 0, policy_version 315022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:36,450][626795] Updated weights for policy 0, policy_version 315032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:38,308][626795] Updated weights for policy 0, policy_version 315042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:38,975][24592] Fps is (10 sec: 43418.8, 60 sec: 42052.2, 300 sec: 41932.0). Total num frames: 2580848640. Throughput: 0: 10627.5. Samples: 395210484. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:38,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:40,279][626795] Updated weights for policy 0, policy_version 315052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:42,114][626795] Updated weights for policy 0, policy_version 315062 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:43,976][24592] Fps is (10 sec: 42597.8, 60 sec: 42052.5, 300 sec: 41931.9). Total num frames: 2581061632. Throughput: 0: 10625.2. Samples: 395242332. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:43,977][24592] Avg episode reward: [(0, '4.408')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:44,027][626795] Updated weights for policy 0, policy_version 315072 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:46,002][626795] Updated weights for policy 0, policy_version 315082 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:47,831][626795] Updated weights for policy 0, policy_version 315092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:48,975][24592] Fps is (10 sec: 43418.1, 60 sec: 42598.5, 300 sec: 41959.7). Total num frames: 2581282816. Throughput: 0: 10619.6. Samples: 395307048. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:48,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:49,707][626795] Updated weights for policy 0, policy_version 315102 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:52,236][626795] Updated weights for policy 0, policy_version 315112 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:53,976][24592] Fps is (10 sec: 40140.7, 60 sec: 42188.7, 300 sec: 41876.4). Total num frames: 2581463040. Throughput: 0: 10466.4. Samples: 395364852. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:53,977][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:54,196][626795] Updated weights for policy 0, policy_version 315122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:56,194][626795] Updated weights for policy 0, policy_version 315132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:46:58,046][626795] Updated weights for policy 0, policy_version 315142 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:58,975][24592] Fps is (10 sec: 39321.3, 60 sec: 42052.3, 300 sec: 41904.2). Total num frames: 2581676032. Throughput: 0: 10455.9. Samples: 395395932. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:46:58,980][24592] Avg episode reward: [(0, '4.934')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:00,040][626795] Updated weights for policy 0, policy_version 315152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:01,824][626795] Updated weights for policy 0, policy_version 315162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:03,764][626795] Updated weights for policy 0, policy_version 315172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:03,975][24592] Fps is (10 sec: 43418.2, 60 sec: 42190.1, 300 sec: 41904.2). Total num frames: 2581897216. Throughput: 0: 10512.6. Samples: 395461284. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:03,977][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000315173_2581897216.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:04,142][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000313941_2571804672.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:05,722][626795] Updated weights for policy 0, policy_version 315182 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:07,552][626795] Updated weights for policy 0, policy_version 315192 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:08,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42052.3, 300 sec: 42015.2). Total num frames: 2582110208. Throughput: 0: 10640.8. Samples: 395526114. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:08,976][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:09,425][626795] Updated weights for policy 0, policy_version 315202 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:11,388][626795] Updated weights for policy 0, policy_version 315212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:13,178][626795] Updated weights for policy 0, policy_version 315222 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:13,976][24592] Fps is (10 sec: 43416.5, 60 sec: 42188.6, 300 sec: 42070.8). Total num frames: 2582331392. Throughput: 0: 10636.2. Samples: 395558388. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:13,976][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:15,034][626795] Updated weights for policy 0, policy_version 315232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:16,987][626795] Updated weights for policy 0, policy_version 315242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:18,927][626795] Updated weights for policy 0, policy_version 315252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:18,976][24592] Fps is (10 sec: 43416.1, 60 sec: 42598.2, 300 sec: 42043.0). Total num frames: 2582544384. Throughput: 0: 10613.4. Samples: 395623494. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:18,979][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:20,780][626795] Updated weights for policy 0, policy_version 315262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:22,641][626795] Updated weights for policy 0, policy_version 315272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:23,975][24592] Fps is (10 sec: 40142.0, 60 sec: 42325.3, 300 sec: 41959.7). Total num frames: 2582732800. Throughput: 0: 10492.3. Samples: 395682636. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:23,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:25,298][626795] Updated weights for policy 0, policy_version 315282 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:26,968][626795] Updated weights for policy 0, policy_version 315292 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:28,975][24592] Fps is (10 sec: 40142.1, 60 sec: 42189.0, 300 sec: 41959.7). Total num frames: 2582945792. Throughput: 0: 10487.2. Samples: 395714256. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:28,978][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:29,140][626795] Updated weights for policy 0, policy_version 315302 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:30,951][626795] Updated weights for policy 0, policy_version 315312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:32,826][626795] Updated weights for policy 0, policy_version 315322 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:33,976][24592] Fps is (10 sec: 42597.2, 60 sec: 42052.1, 300 sec: 41987.4). Total num frames: 2583158784. Throughput: 0: 10477.4. Samples: 395778534. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:33,977][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:34,740][626795] Updated weights for policy 0, policy_version 315332 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:36,663][626795] Updated weights for policy 0, policy_version 315342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:38,382][626795] Updated weights for policy 0, policy_version 315352 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:38,976][24592] Fps is (10 sec: 43416.0, 60 sec: 42188.6, 300 sec: 42015.2). Total num frames: 2583379968. Throughput: 0: 10650.6. Samples: 395844132. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:38,977][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:40,360][626795] Updated weights for policy 0, policy_version 315362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:42,253][626795] Updated weights for policy 0, policy_version 315372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:43,976][24592] Fps is (10 sec: 44237.0, 60 sec: 42325.3, 300 sec: 42126.3). Total num frames: 2583601152. Throughput: 0: 10663.7. Samples: 395875800. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:43,976][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:44,003][626795] Updated weights for policy 0, policy_version 315382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:45,949][626795] Updated weights for policy 0, policy_version 315392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:47,895][626795] Updated weights for policy 0, policy_version 315402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:48,975][24592] Fps is (10 sec: 44238.7, 60 sec: 42325.3, 300 sec: 42181.9). Total num frames: 2583822336. Throughput: 0: 10677.6. Samples: 395941776. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:48,976][24592] Avg episode reward: [(0, '4.429')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:49,748][626795] Updated weights for policy 0, policy_version 315412 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:51,648][626795] Updated weights for policy 0, policy_version 315422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:53,561][626795] Updated weights for policy 0, policy_version 315432 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:53,976][24592] Fps is (10 sec: 43417.2, 60 sec: 42871.4, 300 sec: 42181.9). Total num frames: 2584035328. Throughput: 0: 10679.8. Samples: 396006708. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:53,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:55,542][626795] Updated weights for policy 0, policy_version 315442 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:47:58,023][626795] Updated weights for policy 0, policy_version 315452 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:58,975][24592] Fps is (10 sec: 39321.2, 60 sec: 42325.3, 300 sec: 42070.8). Total num frames: 2584215552. Throughput: 0: 10514.3. Samples: 396031530. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:47:58,976][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:00,057][626795] Updated weights for policy 0, policy_version 315462 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:01,932][626795] Updated weights for policy 0, policy_version 315472 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:03,643][626772] Signal inference workers to stop experience collection... (5150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:03,654][626772] Signal inference workers to resume experience collection... (5150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:03,662][626795] InferenceWorker_p0-w0: stopping experience collection (5150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:03,668][626795] InferenceWorker_p0-w0: resuming experience collection (5150 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:03,690][626795] Updated weights for policy 0, policy_version 315482 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:03,975][24592] Fps is (10 sec: 39322.8, 60 sec: 42188.8, 300 sec: 42070.8). Total num frames: 2584428544. Throughput: 0: 10478.0. Samples: 396094998. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:03,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:05,724][626795] Updated weights for policy 0, policy_version 315492 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:07,604][626795] Updated weights for policy 0, policy_version 315502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:08,975][24592] Fps is (10 sec: 43418.3, 60 sec: 42325.4, 300 sec: 42098.6). Total num frames: 2584649728. Throughput: 0: 10627.2. Samples: 396160860. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:08,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:09,449][626795] Updated weights for policy 0, policy_version 315512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:11,285][626795] Updated weights for policy 0, policy_version 315522 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:13,227][626795] Updated weights for policy 0, policy_version 315532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:13,975][24592] Fps is (10 sec: 44237.0, 60 sec: 42325.6, 300 sec: 42126.4). Total num frames: 2584870912. Throughput: 0: 10652.7. Samples: 396193626. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:13,976][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:14,971][626795] Updated weights for policy 0, policy_version 315542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:16,956][626795] Updated weights for policy 0, policy_version 315552 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:18,812][626795] Updated weights for policy 0, policy_version 315562 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:18,975][24592] Fps is (10 sec: 44236.8, 60 sec: 42462.2, 300 sec: 42181.9). Total num frames: 2585092096. Throughput: 0: 10666.6. Samples: 396258528. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:18,976][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:20,869][626795] Updated weights for policy 0, policy_version 315572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:22,596][626795] Updated weights for policy 0, policy_version 315582 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:23,976][24592] Fps is (10 sec: 43415.8, 60 sec: 42871.2, 300 sec: 42181.8). Total num frames: 2585305088. Throughput: 0: 10653.9. Samples: 396323556. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:23,978][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:24,547][626795] Updated weights for policy 0, policy_version 315592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:26,415][626795] Updated weights for policy 0, policy_version 315602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:28,306][626795] Updated weights for policy 0, policy_version 315612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:29,417][24592] Fps is (10 sec: 40012.4, 60 sec: 42422.8, 300 sec: 42202.0). Total num frames: 2585509888. Throughput: 0: 10570.5. Samples: 396356136. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:29,418][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:30,949][626795] Updated weights for policy 0, policy_version 315622 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:32,739][626795] Updated weights for policy 0, policy_version 315632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:33,975][24592] Fps is (10 sec: 39323.0, 60 sec: 42325.5, 300 sec: 42209.6). Total num frames: 2585698304. Throughput: 0: 10479.1. Samples: 396413334. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:33,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:34,889][626795] Updated weights for policy 0, policy_version 315642 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:36,603][626795] Updated weights for policy 0, policy_version 315652 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:38,627][626795] Updated weights for policy 0, policy_version 315662 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:38,975][24592] Fps is (10 sec: 42852.0, 60 sec: 42325.6, 300 sec: 42209.6). Total num frames: 2585919488. Throughput: 0: 10469.0. Samples: 396477810. Policy #0 lag: (min: 1.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:38,976][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:40,443][626795] Updated weights for policy 0, policy_version 315672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:42,378][626795] Updated weights for policy 0, policy_version 315682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:43,975][24592] Fps is (10 sec: 44236.4, 60 sec: 42325.4, 300 sec: 42265.1). Total num frames: 2586140672. Throughput: 0: 10633.9. Samples: 396510054. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:43,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:44,227][626795] Updated weights for policy 0, policy_version 315692 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:46,089][626795] Updated weights for policy 0, policy_version 315702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:47,894][626795] Updated weights for policy 0, policy_version 315712 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:48,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42188.8, 300 sec: 42293.0). Total num frames: 2586353664. Throughput: 0: 10682.8. Samples: 396575724. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:48,977][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:49,835][626795] Updated weights for policy 0, policy_version 315722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:51,688][626795] Updated weights for policy 0, policy_version 315732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:53,635][626795] Updated weights for policy 0, policy_version 315742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:53,976][24592] Fps is (10 sec: 43415.1, 60 sec: 42325.1, 300 sec: 42320.6). Total num frames: 2586574848. Throughput: 0: 10674.8. Samples: 396641232. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:53,977][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:55,466][626795] Updated weights for policy 0, policy_version 315752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:57,430][626795] Updated weights for policy 0, policy_version 315762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:58,978][24592] Fps is (10 sec: 43407.9, 60 sec: 42870.0, 300 sec: 42292.6). Total num frames: 2586787840. Throughput: 0: 10657.1. Samples: 396673218. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:48:58,978][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:48:59,339][626795] Updated weights for policy 0, policy_version 315772 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:01,225][626795] Updated weights for policy 0, policy_version 315782 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:03,684][626795] Updated weights for policy 0, policy_version 315792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:03,975][24592] Fps is (10 sec: 39324.2, 60 sec: 42325.3, 300 sec: 42237.4). Total num frames: 2586968064. Throughput: 0: 10490.8. Samples: 396730614. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:03,977][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000315792_2586968064.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:04,120][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000314556_2576842752.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:05,689][626795] Updated weights for policy 0, policy_version 315802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:07,613][626795] Updated weights for policy 0, policy_version 315812 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:08,975][24592] Fps is (10 sec: 39329.9, 60 sec: 42188.7, 300 sec: 42237.4). Total num frames: 2587181056. Throughput: 0: 10457.1. Samples: 396794124. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:08,977][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:09,686][626795] Updated weights for policy 0, policy_version 315822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:11,419][626795] Updated weights for policy 0, policy_version 315832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:13,287][626795] Updated weights for policy 0, policy_version 315842 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:13,975][24592] Fps is (10 sec: 43417.9, 60 sec: 42188.8, 300 sec: 42237.4). Total num frames: 2587402240. Throughput: 0: 10572.3. Samples: 396827220. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:13,978][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:15,167][626795] Updated weights for policy 0, policy_version 315852 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:17,070][626795] Updated weights for policy 0, policy_version 315862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:18,845][626795] Updated weights for policy 0, policy_version 315872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:18,975][24592] Fps is (10 sec: 45056.3, 60 sec: 42325.3, 300 sec: 42293.0). Total num frames: 2587631616. Throughput: 0: 10661.3. Samples: 396893094. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:18,976][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:20,891][626795] Updated weights for policy 0, policy_version 315882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:22,626][626795] Updated weights for policy 0, policy_version 315892 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:23,975][24592] Fps is (10 sec: 44236.4, 60 sec: 42325.6, 300 sec: 42292.9). Total num frames: 2587844608. Throughput: 0: 10690.9. Samples: 396958902. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:23,977][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:24,532][626795] Updated weights for policy 0, policy_version 315902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:26,466][626795] Updated weights for policy 0, policy_version 315912 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:28,380][626795] Updated weights for policy 0, policy_version 315922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:28,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42776.6, 300 sec: 42292.9). Total num frames: 2588057600. Throughput: 0: 10689.8. Samples: 396991092. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:28,976][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:30,242][626795] Updated weights for policy 0, policy_version 315932 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:32,221][626795] Updated weights for policy 0, policy_version 315942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:33,963][626795] Updated weights for policy 0, policy_version 315952 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:33,976][24592] Fps is (10 sec: 43416.3, 60 sec: 43007.8, 300 sec: 42394.6). Total num frames: 2588278784. Throughput: 0: 10651.4. Samples: 397055040. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:33,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:36,614][626795] Updated weights for policy 0, policy_version 315962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:38,583][626795] Updated weights for policy 0, policy_version 315972 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:38,975][24592] Fps is (10 sec: 40140.7, 60 sec: 42325.3, 300 sec: 42293.1). Total num frames: 2588459008. Throughput: 0: 10477.0. Samples: 397112688. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:38,977][24592] Avg episode reward: [(0, '4.393')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:41,077][626795] Updated weights for policy 0, policy_version 315982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:42,842][626795] Updated weights for policy 0, policy_version 315992 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:43,976][24592] Fps is (10 sec: 36864.8, 60 sec: 41779.2, 300 sec: 42209.6). Total num frames: 2588647424. Throughput: 0: 10348.3. Samples: 397138872. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:43,977][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:44,899][626795] Updated weights for policy 0, policy_version 316002 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:46,770][626795] Updated weights for policy 0, policy_version 316012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:48,593][626795] Updated weights for policy 0, policy_version 316022 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:48,975][24592] Fps is (10 sec: 40141.0, 60 sec: 41779.2, 300 sec: 42181.9). Total num frames: 2588860416. Throughput: 0: 10499.1. Samples: 397203072. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:48,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:50,646][626795] Updated weights for policy 0, policy_version 316032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:52,468][626795] Updated weights for policy 0, policy_version 316042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:53,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41643.1, 300 sec: 42181.9). Total num frames: 2589073408. Throughput: 0: 10520.3. Samples: 397267536. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:53,978][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:54,360][626795] Updated weights for policy 0, policy_version 316052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:56,589][626795] Updated weights for policy 0, policy_version 316062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:49:58,462][626795] Updated weights for policy 0, policy_version 316072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:58,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41507.6, 300 sec: 42154.1). Total num frames: 2589278208. Throughput: 0: 10447.7. Samples: 397297368. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:49:58,977][24592] Avg episode reward: [(0, '4.816')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:00,622][626795] Updated weights for policy 0, policy_version 316082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:03,181][626795] Updated weights for policy 0, policy_version 316092 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:03,975][24592] Fps is (10 sec: 37683.2, 60 sec: 41369.6, 300 sec: 42015.3). Total num frames: 2589450240. Throughput: 0: 10223.7. Samples: 397353162. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:03,977][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:05,355][626795] Updated weights for policy 0, policy_version 316102 (0.0044)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:07,478][626795] Updated weights for policy 0, policy_version 316112 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:08,975][24592] Fps is (10 sec: 33587.3, 60 sec: 40550.4, 300 sec: 41931.9). Total num frames: 2589614080. Throughput: 0: 9882.1. Samples: 397403598. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:08,976][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:10,287][626795] Updated weights for policy 0, policy_version 316122 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:12,755][626795] Updated weights for policy 0, policy_version 316132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:13,976][24592] Fps is (10 sec: 34402.7, 60 sec: 39867.0, 300 sec: 41792.9). Total num frames: 2589794304. Throughput: 0: 9773.5. Samples: 397430910. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:13,979][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:15,034][626795] Updated weights for policy 0, policy_version 316142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:17,143][626795] Updated weights for policy 0, policy_version 316152 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:18,976][24592] Fps is (10 sec: 37681.8, 60 sec: 39321.3, 300 sec: 41737.5). Total num frames: 2589990912. Throughput: 0: 9560.1. Samples: 397485246. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:18,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:19,038][626795] Updated weights for policy 0, policy_version 316162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:20,983][626795] Updated weights for policy 0, policy_version 316172 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:22,948][626795] Updated weights for policy 0, policy_version 316182 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:23,975][24592] Fps is (10 sec: 40964.6, 60 sec: 39321.6, 300 sec: 41737.5). Total num frames: 2590203904. Throughput: 0: 9685.7. Samples: 397548546. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:23,977][24592] Avg episode reward: [(0, '5.003')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:24,854][626795] Updated weights for policy 0, policy_version 316192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:26,754][626795] Updated weights for policy 0, policy_version 316202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:28,653][626795] Updated weights for policy 0, policy_version 316212 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:28,975][24592] Fps is (10 sec: 43419.5, 60 sec: 39458.1, 300 sec: 41737.6). Total num frames: 2590425088. Throughput: 0: 9827.8. Samples: 397581120. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:28,976][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:30,591][626795] Updated weights for policy 0, policy_version 316222 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:32,390][626795] Updated weights for policy 0, policy_version 316232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:33,975][24592] Fps is (10 sec: 42598.6, 60 sec: 39185.3, 300 sec: 41709.8). Total num frames: 2590629888. Throughput: 0: 9836.7. Samples: 397645722. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:33,977][24592] Avg episode reward: [(0, '4.765')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:34,434][626795] Updated weights for policy 0, policy_version 316242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:36,370][626795] Updated weights for policy 0, policy_version 316252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:38,258][626795] Updated weights for policy 0, policy_version 316262 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:38,975][24592] Fps is (10 sec: 41778.8, 60 sec: 39731.2, 300 sec: 41709.8). Total num frames: 2590842880. Throughput: 0: 9829.7. Samples: 397709874. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:38,977][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:40,088][626795] Updated weights for policy 0, policy_version 316272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:42,801][626795] Updated weights for policy 0, policy_version 316282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:43,976][24592] Fps is (10 sec: 39319.2, 60 sec: 39594.4, 300 sec: 41681.9). Total num frames: 2591023104. Throughput: 0: 9705.0. Samples: 397734096. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:43,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:44,790][626795] Updated weights for policy 0, policy_version 316292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:46,618][626795] Updated weights for policy 0, policy_version 316302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:48,516][626795] Updated weights for policy 0, policy_version 316312 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:48,976][24592] Fps is (10 sec: 39318.0, 60 sec: 39594.0, 300 sec: 41709.7). Total num frames: 2591236096. Throughput: 0: 9886.3. Samples: 397798056. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:48,978][24592] Avg episode reward: [(0, '4.956')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:50,562][626795] Updated weights for policy 0, policy_version 316322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:52,359][626795] Updated weights for policy 0, policy_version 316332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:53,978][24592] Fps is (10 sec: 43409.4, 60 sec: 39729.6, 300 sec: 41709.4). Total num frames: 2591457280. Throughput: 0: 10197.3. Samples: 397862502. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:53,979][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:54,379][626795] Updated weights for policy 0, policy_version 316342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:56,266][626795] Updated weights for policy 0, policy_version 316352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:58,130][626795] Updated weights for policy 0, policy_version 316362 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:58,975][24592] Fps is (10 sec: 43422.0, 60 sec: 39867.8, 300 sec: 41710.0). Total num frames: 2591670272. Throughput: 0: 10291.6. Samples: 397894020. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:50:58,977][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:50:59,937][626795] Updated weights for policy 0, policy_version 316372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:01,917][626795] Updated weights for policy 0, policy_version 316382 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:03,777][626795] Updated weights for policy 0, policy_version 316392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:03,976][24592] Fps is (10 sec: 43426.4, 60 sec: 40686.7, 300 sec: 41709.7). Total num frames: 2591891456. Throughput: 0: 10537.6. Samples: 397959438. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:03,978][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000316393_2591891456.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:04,112][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000315173_2581897216.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:05,821][626795] Updated weights for policy 0, policy_version 316402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:07,592][626795] Updated weights for policy 0, policy_version 316412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:08,976][24592] Fps is (10 sec: 43416.8, 60 sec: 41506.0, 300 sec: 41709.8). Total num frames: 2592104448. Throughput: 0: 10536.6. Samples: 398022696. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:08,977][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:09,598][626795] Updated weights for policy 0, policy_version 316422 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:11,558][626795] Updated weights for policy 0, policy_version 316432 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:13,977][24592] Fps is (10 sec: 39318.8, 60 sec: 41506.1, 300 sec: 41681.9). Total num frames: 2592284672. Throughput: 0: 10530.1. Samples: 398054988. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:13,978][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:14,153][626795] Updated weights for policy 0, policy_version 316442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:15,933][626795] Updated weights for policy 0, policy_version 316452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:17,957][626795] Updated weights for policy 0, policy_version 316462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:18,976][24592] Fps is (10 sec: 39320.2, 60 sec: 41779.1, 300 sec: 41709.7). Total num frames: 2592497664. Throughput: 0: 10359.9. Samples: 398111922. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:18,977][24592] Avg episode reward: [(0, '4.393')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:19,877][626795] Updated weights for policy 0, policy_version 316472 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:21,747][626795] Updated weights for policy 0, policy_version 316482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:23,574][626795] Updated weights for policy 0, policy_version 316492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:23,975][24592] Fps is (10 sec: 43422.6, 60 sec: 41915.7, 300 sec: 41709.8). Total num frames: 2592718848. Throughput: 0: 10386.4. Samples: 398177262. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:23,977][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:25,487][626795] Updated weights for policy 0, policy_version 316502 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:27,352][626795] Updated weights for policy 0, policy_version 316512 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:28,975][24592] Fps is (10 sec: 43419.8, 60 sec: 41779.2, 300 sec: 41682.0). Total num frames: 2592931840. Throughput: 0: 10563.1. Samples: 398209428. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:28,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:29,372][626795] Updated weights for policy 0, policy_version 316522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:31,136][626795] Updated weights for policy 0, policy_version 316532 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:33,049][626795] Updated weights for policy 0, policy_version 316542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:33,976][24592] Fps is (10 sec: 42597.2, 60 sec: 41915.5, 300 sec: 41682.0). Total num frames: 2593144832. Throughput: 0: 10585.2. Samples: 398274384. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:33,977][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:35,102][626795] Updated weights for policy 0, policy_version 316552 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:36,878][626795] Updated weights for policy 0, policy_version 316562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:38,730][626795] Updated weights for policy 0, policy_version 316572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:38,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42052.3, 300 sec: 41709.8). Total num frames: 2593366016. Throughput: 0: 10580.3. Samples: 398338590. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:38,976][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:40,754][626795] Updated weights for policy 0, policy_version 316582 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:42,596][626795] Updated weights for policy 0, policy_version 316592 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:43,976][24592] Fps is (10 sec: 43416.8, 60 sec: 42598.5, 300 sec: 41681.9). Total num frames: 2593579008. Throughput: 0: 10596.8. Samples: 398370882. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:43,977][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:44,479][626795] Updated weights for policy 0, policy_version 316602 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:47,003][626795] Updated weights for policy 0, policy_version 316612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:48,925][626795] Updated weights for policy 0, policy_version 316622 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:48,976][24592] Fps is (10 sec: 40139.8, 60 sec: 42189.3, 300 sec: 41709.8). Total num frames: 2593767424. Throughput: 0: 10420.8. Samples: 398428374. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:48,978][24592] Avg episode reward: [(0, '4.790')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:50,825][626795] Updated weights for policy 0, policy_version 316632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:52,761][626795] Updated weights for policy 0, policy_version 316642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:53,976][24592] Fps is (10 sec: 40142.0, 60 sec: 42053.9, 300 sec: 41709.8). Total num frames: 2593980416. Throughput: 0: 10463.6. Samples: 398493558. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:53,976][24592] Avg episode reward: [(0, '4.338')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:54,559][626795] Updated weights for policy 0, policy_version 316652 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:56,527][626795] Updated weights for policy 0, policy_version 316662 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:51:58,362][626795] Updated weights for policy 0, policy_version 316672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:58,976][24592] Fps is (10 sec: 43417.1, 60 sec: 42188.5, 300 sec: 41709.7). Total num frames: 2594201600. Throughput: 0: 10465.9. Samples: 398525946. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:51:58,978][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:00,277][626795] Updated weights for policy 0, policy_version 316682 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:02,051][626795] Updated weights for policy 0, policy_version 316692 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:03,975][24592] Fps is (10 sec: 43418.2, 60 sec: 42052.6, 300 sec: 41709.8). Total num frames: 2594414592. Throughput: 0: 10664.7. Samples: 398591826. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:03,976][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:04,142][626795] Updated weights for policy 0, policy_version 316702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:05,878][626795] Updated weights for policy 0, policy_version 316712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:07,771][626795] Updated weights for policy 0, policy_version 316722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:08,975][24592] Fps is (10 sec: 43419.2, 60 sec: 42188.9, 300 sec: 41709.8). Total num frames: 2594635776. Throughput: 0: 10643.2. Samples: 398656206. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:08,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:09,712][626795] Updated weights for policy 0, policy_version 316732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:11,699][626795] Updated weights for policy 0, policy_version 316742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:13,511][626795] Updated weights for policy 0, policy_version 316752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:13,975][24592] Fps is (10 sec: 43417.1, 60 sec: 42735.7, 300 sec: 41709.8). Total num frames: 2594848768. Throughput: 0: 10645.3. Samples: 398688468. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:13,977][24592] Avg episode reward: [(0, '4.957')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:15,417][626795] Updated weights for policy 0, policy_version 316762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:17,355][626795] Updated weights for policy 0, policy_version 316772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:19,590][24592] Fps is (10 sec: 40133.3, 60 sec: 42302.3, 300 sec: 41706.2). Total num frames: 2595061760. Throughput: 0: 10495.9. Samples: 398753142. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:19,592][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:20,000][626795] Updated weights for policy 0, policy_version 316782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:21,858][626795] Updated weights for policy 0, policy_version 316792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:23,684][626795] Updated weights for policy 0, policy_version 316802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:23,975][24592] Fps is (10 sec: 40140.9, 60 sec: 42188.7, 300 sec: 41709.8). Total num frames: 2595250176. Throughput: 0: 10489.3. Samples: 398810610. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:23,976][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:25,622][626795] Updated weights for policy 0, policy_version 316812 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:27,507][626795] Updated weights for policy 0, policy_version 316822 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:28,975][24592] Fps is (10 sec: 43640.6, 60 sec: 42325.4, 300 sec: 41737.6). Total num frames: 2595471360. Throughput: 0: 10478.9. Samples: 398842428. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:28,976][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:29,448][626795] Updated weights for policy 0, policy_version 316832 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:31,296][626795] Updated weights for policy 0, policy_version 316842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:33,075][626795] Updated weights for policy 0, policy_version 316852 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:33,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42325.5, 300 sec: 41709.8). Total num frames: 2595684352. Throughput: 0: 10669.1. Samples: 398908482. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:33,976][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:35,063][626795] Updated weights for policy 0, policy_version 316862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:37,000][626795] Updated weights for policy 0, policy_version 316872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:38,910][626795] Updated weights for policy 0, policy_version 316882 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:38,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42188.8, 300 sec: 41682.0). Total num frames: 2595897344. Throughput: 0: 10650.4. Samples: 398972826. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:38,977][24592] Avg episode reward: [(0, '4.887')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:40,772][626795] Updated weights for policy 0, policy_version 316892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:42,721][626795] Updated weights for policy 0, policy_version 316902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:43,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42189.1, 300 sec: 41654.2). Total num frames: 2596110336. Throughput: 0: 10631.7. Samples: 399004368. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:43,977][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:44,638][626795] Updated weights for policy 0, policy_version 316912 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:46,487][626795] Updated weights for policy 0, policy_version 316922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:48,430][626795] Updated weights for policy 0, policy_version 316932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:48,976][24592] Fps is (10 sec: 42597.3, 60 sec: 42598.4, 300 sec: 41654.2). Total num frames: 2596323328. Throughput: 0: 10597.3. Samples: 399068706. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:48,977][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:50,450][626795] Updated weights for policy 0, policy_version 316942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:53,027][626795] Updated weights for policy 0, policy_version 316952 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:53,975][24592] Fps is (10 sec: 40140.9, 60 sec: 42188.9, 300 sec: 41682.0). Total num frames: 2596511744. Throughput: 0: 10431.5. Samples: 399125622. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:53,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:54,868][626795] Updated weights for policy 0, policy_version 316962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:56,816][626795] Updated weights for policy 0, policy_version 316972 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:52:58,747][626795] Updated weights for policy 0, policy_version 316982 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:58,975][24592] Fps is (10 sec: 40141.5, 60 sec: 42052.5, 300 sec: 41682.0). Total num frames: 2596724736. Throughput: 0: 10430.5. Samples: 399157842. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:52:58,977][24592] Avg episode reward: [(0, '4.901')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:00,580][626795] Updated weights for policy 0, policy_version 316992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:02,433][626795] Updated weights for policy 0, policy_version 317002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:03,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42188.8, 300 sec: 41682.0). Total num frames: 2596945920. Throughput: 0: 10583.0. Samples: 399222876. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:03,977][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000317010_2596945920.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:04,121][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000315792_2586968064.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:04,462][626795] Updated weights for policy 0, policy_version 317012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:06,437][626795] Updated weights for policy 0, policy_version 317022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:08,312][626795] Updated weights for policy 0, policy_version 317032 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:08,976][24592] Fps is (10 sec: 42597.9, 60 sec: 41915.6, 300 sec: 41626.4). Total num frames: 2597150720. Throughput: 0: 10560.9. Samples: 399285852. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:08,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:10,182][626795] Updated weights for policy 0, policy_version 317042 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:12,125][626795] Updated weights for policy 0, policy_version 317052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41915.8, 300 sec: 41598.7). Total num frames: 2597363712. Throughput: 0: 10563.2. Samples: 399317772. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:13,976][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:14,052][626795] Updated weights for policy 0, policy_version 317062 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:16,010][626795] Updated weights for policy 0, policy_version 317072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:18,020][626795] Updated weights for policy 0, policy_version 317082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:18,977][24592] Fps is (10 sec: 42592.5, 60 sec: 42348.2, 300 sec: 41598.5). Total num frames: 2597576704. Throughput: 0: 10517.1. Samples: 399381768. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:18,978][24592] Avg episode reward: [(0, '4.891')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:19,930][626795] Updated weights for policy 0, policy_version 317092 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:21,752][626795] Updated weights for policy 0, policy_version 317102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:23,711][626795] Updated weights for policy 0, policy_version 317112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:23,977][24592] Fps is (10 sec: 42593.1, 60 sec: 42324.5, 300 sec: 41688.7). Total num frames: 2597789696. Throughput: 0: 10499.2. Samples: 399445302. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:23,978][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:26,384][626795] Updated weights for policy 0, policy_version 317122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:28,344][626795] Updated weights for policy 0, policy_version 317132 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:28,975][24592] Fps is (10 sec: 39327.4, 60 sec: 41642.6, 300 sec: 41598.7). Total num frames: 2597969920. Throughput: 0: 10338.7. Samples: 399469608. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:28,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:30,222][626795] Updated weights for policy 0, policy_version 317142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:32,164][626795] Updated weights for policy 0, policy_version 317152 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:33,975][24592] Fps is (10 sec: 39326.5, 60 sec: 41642.7, 300 sec: 41570.9). Total num frames: 2598182912. Throughput: 0: 10326.9. Samples: 399533412. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:33,977][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:34,014][626795] Updated weights for policy 0, policy_version 317162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:35,953][626795] Updated weights for policy 0, policy_version 317172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:37,875][626795] Updated weights for policy 0, policy_version 317182 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:38,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41642.7, 300 sec: 41543.2). Total num frames: 2598395904. Throughput: 0: 10496.3. Samples: 399597954. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:38,976][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:39,797][626795] Updated weights for policy 0, policy_version 317192 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:41,792][626795] Updated weights for policy 0, policy_version 317202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:43,681][626795] Updated weights for policy 0, policy_version 317212 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:43,976][24592] Fps is (10 sec: 42597.7, 60 sec: 41642.6, 300 sec: 41543.1). Total num frames: 2598608896. Throughput: 0: 10478.0. Samples: 399629352. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:43,976][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:45,664][626795] Updated weights for policy 0, policy_version 317222 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:47,587][626795] Updated weights for policy 0, policy_version 317232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:48,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41642.8, 300 sec: 41515.5). Total num frames: 2598821888. Throughput: 0: 10449.9. Samples: 399693120. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:48,976][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:49,497][626795] Updated weights for policy 0, policy_version 317242 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:51,471][626795] Updated weights for policy 0, policy_version 317252 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:53,281][626795] Updated weights for policy 0, policy_version 317262 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:53,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42052.2, 300 sec: 41515.7). Total num frames: 2599034880. Throughput: 0: 10458.6. Samples: 399756486. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:53,976][24592] Avg episode reward: [(0, '5.008')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:55,261][626795] Updated weights for policy 0, policy_version 317272 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:57,260][626795] Updated weights for policy 0, policy_version 317282 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:58,975][24592] Fps is (10 sec: 39321.5, 60 sec: 41506.2, 300 sec: 41515.4). Total num frames: 2599215104. Throughput: 0: 10444.0. Samples: 399787752. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:53:58,977][24592] Avg episode reward: [(0, '4.978')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:53:59,870][626795] Updated weights for policy 0, policy_version 317292 (0.0031)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:01,765][626795] Updated weights for policy 0, policy_version 317302 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:03,613][626795] Updated weights for policy 0, policy_version 317312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:03,976][24592] Fps is (10 sec: 39320.1, 60 sec: 41369.3, 300 sec: 41515.3). Total num frames: 2599428096. Throughput: 0: 10284.2. Samples: 399844548. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:03,977][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:05,741][626795] Updated weights for policy 0, policy_version 317322 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:07,556][626795] Updated weights for policy 0, policy_version 317332 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41506.2, 300 sec: 41487.6). Total num frames: 2599641088. Throughput: 0: 10285.2. Samples: 399908124. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:08,976][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:09,652][626795] Updated weights for policy 0, policy_version 317342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:11,528][626795] Updated weights for policy 0, policy_version 317352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:13,372][626795] Updated weights for policy 0, policy_version 317362 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:13,975][24592] Fps is (10 sec: 42600.0, 60 sec: 41506.1, 300 sec: 41432.1). Total num frames: 2599854080. Throughput: 0: 10438.8. Samples: 399939354. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:13,978][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:15,328][626795] Updated weights for policy 0, policy_version 317372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:17,388][626795] Updated weights for policy 0, policy_version 317382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:18,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41370.5, 300 sec: 41404.3). Total num frames: 2600058880. Throughput: 0: 10420.1. Samples: 400002318. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:18,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:19,298][626795] Updated weights for policy 0, policy_version 317392 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:21,176][626795] Updated weights for policy 0, policy_version 317402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:23,155][626795] Updated weights for policy 0, policy_version 317412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:23,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41370.5, 300 sec: 41404.3). Total num frames: 2600271872. Throughput: 0: 10396.4. Samples: 400065792. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:23,976][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:25,096][626795] Updated weights for policy 0, policy_version 317422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:26,982][626795] Updated weights for policy 0, policy_version 317432 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:28,927][626795] Updated weights for policy 0, policy_version 317442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:28,975][24592] Fps is (10 sec: 42599.5, 60 sec: 41915.8, 300 sec: 41376.6). Total num frames: 2600484864. Throughput: 0: 10407.5. Samples: 400097688. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:28,976][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:31,451][626795] Updated weights for policy 0, policy_version 317452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:33,362][626795] Updated weights for policy 0, policy_version 317462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:33,975][24592] Fps is (10 sec: 39321.3, 60 sec: 41369.6, 300 sec: 41376.5). Total num frames: 2600665088. Throughput: 0: 10262.8. Samples: 400154946. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:33,979][24592] Avg episode reward: [(0, '4.497')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:35,411][626795] Updated weights for policy 0, policy_version 317472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:37,319][626795] Updated weights for policy 0, policy_version 317482 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:38,975][24592] Fps is (10 sec: 39321.5, 60 sec: 41369.6, 300 sec: 41459.9). Total num frames: 2600878080. Throughput: 0: 10255.6. Samples: 400217988. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:38,977][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:39,291][626795] Updated weights for policy 0, policy_version 317492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:41,328][626795] Updated weights for policy 0, policy_version 317502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:43,126][626795] Updated weights for policy 0, policy_version 317512 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:43,976][24592] Fps is (10 sec: 42594.8, 60 sec: 41369.1, 300 sec: 41459.7). Total num frames: 2601091072. Throughput: 0: 10242.6. Samples: 400248678. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:43,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:45,045][626795] Updated weights for policy 0, policy_version 317522 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:47,019][626795] Updated weights for policy 0, policy_version 317532 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:48,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41233.0, 300 sec: 41432.1). Total num frames: 2601295872. Throughput: 0: 10400.5. Samples: 400312566. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:48,977][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:49,025][626795] Updated weights for policy 0, policy_version 317542 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:50,878][626795] Updated weights for policy 0, policy_version 317552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:52,858][626795] Updated weights for policy 0, policy_version 317562 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:53,976][24592] Fps is (10 sec: 41781.5, 60 sec: 41232.9, 300 sec: 41459.8). Total num frames: 2601508864. Throughput: 0: 10397.7. Samples: 400376022. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:53,977][24592] Avg episode reward: [(0, '4.605')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:54,803][626795] Updated weights for policy 0, policy_version 317572 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:56,705][626795] Updated weights for policy 0, policy_version 317582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:54:58,686][626795] Updated weights for policy 0, policy_version 317592 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:58,976][24592] Fps is (10 sec: 42597.7, 60 sec: 41779.0, 300 sec: 41598.7). Total num frames: 2601721856. Throughput: 0: 10400.5. Samples: 400407378. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:54:58,977][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:00,655][626795] Updated weights for policy 0, policy_version 317602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:02,508][626795] Updated weights for policy 0, policy_version 317612 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:03,975][24592] Fps is (10 sec: 40142.1, 60 sec: 41369.9, 300 sec: 41682.0). Total num frames: 2601910272. Throughput: 0: 10423.2. Samples: 400471362. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:03,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:04,007][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000317617_2601918464.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:04,122][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000316393_2591891456.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:05,246][626795] Updated weights for policy 0, policy_version 317622 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:07,100][626795] Updated weights for policy 0, policy_version 317632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:08,975][24592] Fps is (10 sec: 38503.5, 60 sec: 41096.6, 300 sec: 41737.7). Total num frames: 2602106880. Throughput: 0: 10240.0. Samples: 400526592. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:08,976][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:09,120][626795] Updated weights for policy 0, policy_version 317642 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:09,916][626772] Signal inference workers to stop experience collection... (5200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:09,925][626772] Signal inference workers to resume experience collection... (5200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:09,937][626795] InferenceWorker_p0-w0: stopping experience collection (5200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:09,940][626795] InferenceWorker_p0-w0: resuming experience collection (5200 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:11,216][626795] Updated weights for policy 0, policy_version 317652 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:13,058][626795] Updated weights for policy 0, policy_version 317662 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:13,975][24592] Fps is (10 sec: 40959.8, 60 sec: 41096.5, 300 sec: 41793.1). Total num frames: 2602319872. Throughput: 0: 10207.4. Samples: 400557024. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:13,976][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:15,005][626795] Updated weights for policy 0, policy_version 317672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:16,957][626795] Updated weights for policy 0, policy_version 317682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:18,938][626795] Updated weights for policy 0, policy_version 317692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:18,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41233.2, 300 sec: 41793.1). Total num frames: 2602532864. Throughput: 0: 10332.5. Samples: 400619910. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:18,976][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:20,828][626795] Updated weights for policy 0, policy_version 317702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:22,829][626795] Updated weights for policy 0, policy_version 317712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:23,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41233.0, 300 sec: 41765.3). Total num frames: 2602745856. Throughput: 0: 10356.9. Samples: 400684050. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:23,976][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:24,655][626795] Updated weights for policy 0, policy_version 317722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:26,602][626795] Updated weights for policy 0, policy_version 317732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:28,577][626795] Updated weights for policy 0, policy_version 317742 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:28,976][24592] Fps is (10 sec: 41777.5, 60 sec: 41096.2, 300 sec: 41765.3). Total num frames: 2602950656. Throughput: 0: 10387.3. Samples: 400716102. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:28,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:30,714][626795] Updated weights for policy 0, policy_version 317752 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:32,585][626795] Updated weights for policy 0, policy_version 317762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:33,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41642.6, 300 sec: 41765.3). Total num frames: 2603163648. Throughput: 0: 10344.6. Samples: 400778076. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:33,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:34,506][626795] Updated weights for policy 0, policy_version 317772 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:37,115][626795] Updated weights for policy 0, policy_version 317782 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:38,925][626795] Updated weights for policy 0, policy_version 317792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:38,975][24592] Fps is (10 sec: 40142.5, 60 sec: 41233.1, 300 sec: 41793.2). Total num frames: 2603352064. Throughput: 0: 10197.4. Samples: 400834902. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:38,976][24592] Avg episode reward: [(0, '4.553')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:40,942][626795] Updated weights for policy 0, policy_version 317802 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:42,923][626795] Updated weights for policy 0, policy_version 317812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:43,975][24592] Fps is (10 sec: 39322.1, 60 sec: 41097.1, 300 sec: 41765.4). Total num frames: 2603556864. Throughput: 0: 10199.4. Samples: 400866348. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:43,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:44,839][626795] Updated weights for policy 0, policy_version 317822 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:46,886][626795] Updated weights for policy 0, policy_version 317832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:48,627][626795] Updated weights for policy 0, policy_version 317842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.1, 300 sec: 41737.9). Total num frames: 2603769856. Throughput: 0: 10189.7. Samples: 400929900. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:48,978][24592] Avg episode reward: [(0, '4.388')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:50,701][626795] Updated weights for policy 0, policy_version 317852 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:52,681][626795] Updated weights for policy 0, policy_version 317862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:53,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41096.7, 300 sec: 41709.8). Total num frames: 2603974656. Throughput: 0: 10347.6. Samples: 400992234. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:53,977][24592] Avg episode reward: [(0, '5.202')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:54,649][626795] Updated weights for policy 0, policy_version 317872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:56,585][626795] Updated weights for policy 0, policy_version 317882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:55:58,586][626795] Updated weights for policy 0, policy_version 317892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:58,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41096.7, 300 sec: 41682.1). Total num frames: 2604187648. Throughput: 0: 10376.3. Samples: 401023956. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:55:58,976][24592] Avg episode reward: [(0, '4.455')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:00,463][626795] Updated weights for policy 0, policy_version 317902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:02,322][626795] Updated weights for policy 0, policy_version 317912 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:03,976][24592] Fps is (10 sec: 42597.9, 60 sec: 41506.0, 300 sec: 41682.0). Total num frames: 2604400640. Throughput: 0: 10384.9. Samples: 401087232. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:03,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:04,384][626795] Updated weights for policy 0, policy_version 317922 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:06,283][626795] Updated weights for policy 0, policy_version 317932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:08,148][626795] Updated weights for policy 0, policy_version 317942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.2, 300 sec: 41793.2). Total num frames: 2604613632. Throughput: 0: 10358.5. Samples: 401150184. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:08,976][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:10,791][626795] Updated weights for policy 0, policy_version 317952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:12,792][626795] Updated weights for policy 0, policy_version 317962 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:13,976][24592] Fps is (10 sec: 39321.2, 60 sec: 41232.9, 300 sec: 41682.0). Total num frames: 2604793856. Throughput: 0: 10208.3. Samples: 401175474. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:13,978][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:14,617][626795] Updated weights for policy 0, policy_version 317972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:16,702][626795] Updated weights for policy 0, policy_version 317982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:18,748][626795] Updated weights for policy 0, policy_version 317992 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:18,976][24592] Fps is (10 sec: 38501.1, 60 sec: 41096.3, 300 sec: 41626.4). Total num frames: 2604998656. Throughput: 0: 10193.4. Samples: 401236782. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:18,978][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:20,808][626795] Updated weights for policy 0, policy_version 318002 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:22,926][626795] Updated weights for policy 0, policy_version 318012 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:23,976][24592] Fps is (10 sec: 40139.6, 60 sec: 40823.1, 300 sec: 41570.9). Total num frames: 2605195264. Throughput: 0: 10258.7. Samples: 401296548. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:23,977][24592] Avg episode reward: [(0, '4.378')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:25,127][626795] Updated weights for policy 0, policy_version 318022 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:26,945][626795] Updated weights for policy 0, policy_version 318032 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:28,960][626795] Updated weights for policy 0, policy_version 318042 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:28,975][24592] Fps is (10 sec: 40142.3, 60 sec: 40823.8, 300 sec: 41543.2). Total num frames: 2605400064. Throughput: 0: 10224.6. Samples: 401326452. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:28,977][24592] Avg episode reward: [(0, '4.901')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:30,953][626795] Updated weights for policy 0, policy_version 318052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:32,882][626795] Updated weights for policy 0, policy_version 318062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:33,975][24592] Fps is (10 sec: 40962.0, 60 sec: 40687.1, 300 sec: 41487.6). Total num frames: 2605604864. Throughput: 0: 10198.0. Samples: 401388810. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:33,978][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:34,954][626795] Updated weights for policy 0, policy_version 318072 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:36,805][626795] Updated weights for policy 0, policy_version 318082 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:38,975][24592] Fps is (10 sec: 40140.7, 60 sec: 40823.5, 300 sec: 41432.1). Total num frames: 2605801472. Throughput: 0: 10183.7. Samples: 401450502. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:38,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:38,988][626795] Updated weights for policy 0, policy_version 318092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:41,140][626795] Updated weights for policy 0, policy_version 318102 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:43,958][626795] Updated weights for policy 0, policy_version 318112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:43,975][24592] Fps is (10 sec: 36864.0, 60 sec: 40277.4, 300 sec: 41376.6). Total num frames: 2605973504. Throughput: 0: 10074.1. Samples: 401477292. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:43,977][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:46,011][626795] Updated weights for policy 0, policy_version 318122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:47,982][626795] Updated weights for policy 0, policy_version 318132 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:48,975][24592] Fps is (10 sec: 37682.9, 60 sec: 40140.8, 300 sec: 41348.8). Total num frames: 2606178304. Throughput: 0: 9880.4. Samples: 401531850. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:48,977][24592] Avg episode reward: [(0, '4.902')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:50,045][626795] Updated weights for policy 0, policy_version 318142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:51,976][626795] Updated weights for policy 0, policy_version 318152 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:53,907][626795] Updated weights for policy 0, policy_version 318162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:53,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40140.8, 300 sec: 41293.3). Total num frames: 2606383104. Throughput: 0: 9852.9. Samples: 401593566. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:53,977][24592] Avg episode reward: [(0, '4.392')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:55,945][626795] Updated weights for policy 0, policy_version 318172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:57,868][626795] Updated weights for policy 0, policy_version 318182 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:58,976][24592] Fps is (10 sec: 40139.8, 60 sec: 39867.5, 300 sec: 41237.7). Total num frames: 2606579712. Throughput: 0: 9978.7. Samples: 401624514. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:56:58,980][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:56:59,974][626795] Updated weights for policy 0, policy_version 318192 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:01,913][626795] Updated weights for policy 0, policy_version 318202 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:03,844][626795] Updated weights for policy 0, policy_version 318212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:03,976][24592] Fps is (10 sec: 40959.1, 60 sec: 39867.7, 300 sec: 41209.9). Total num frames: 2606792704. Throughput: 0: 9991.1. Samples: 401686380. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:03,976][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000318212_2606792704.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:04,117][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000317010_2596945920.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:05,927][626795] Updated weights for policy 0, policy_version 318222 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:07,922][626795] Updated weights for policy 0, policy_version 318232 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:08,975][24592] Fps is (10 sec: 41780.2, 60 sec: 39731.2, 300 sec: 41182.2). Total num frames: 2606997504. Throughput: 0: 10027.3. Samples: 401747772. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:08,977][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:09,876][626795] Updated weights for policy 0, policy_version 318242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:11,930][626795] Updated weights for policy 0, policy_version 318252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:13,820][626795] Updated weights for policy 0, policy_version 318262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:13,975][24592] Fps is (10 sec: 40960.9, 60 sec: 40141.0, 300 sec: 41240.3). Total num frames: 2607202304. Throughput: 0: 10040.7. Samples: 401778282. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:13,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:16,441][626795] Updated weights for policy 0, policy_version 318272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:18,473][626795] Updated weights for policy 0, policy_version 318282 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:18,975][24592] Fps is (10 sec: 38502.7, 60 sec: 39731.4, 300 sec: 41126.6). Total num frames: 2607382528. Throughput: 0: 9887.5. Samples: 401833746. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:18,977][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:20,503][626795] Updated weights for policy 0, policy_version 318292 (0.0038)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:22,505][626795] Updated weights for policy 0, policy_version 318302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:23,976][24592] Fps is (10 sec: 38501.7, 60 sec: 39868.0, 300 sec: 41071.1). Total num frames: 2607587328. Throughput: 0: 9890.1. Samples: 401895558. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:23,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:24,428][626795] Updated weights for policy 0, policy_version 318312 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:26,458][626795] Updated weights for policy 0, policy_version 318322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:28,359][626795] Updated weights for policy 0, policy_version 318332 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:28,975][24592] Fps is (10 sec: 40959.6, 60 sec: 39867.7, 300 sec: 41043.3). Total num frames: 2607792128. Throughput: 0: 9984.1. Samples: 401926578. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:28,978][24592] Avg episode reward: [(0, '4.880')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:30,413][626795] Updated weights for policy 0, policy_version 318342 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:32,430][626795] Updated weights for policy 0, policy_version 318352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:33,975][24592] Fps is (10 sec: 40960.8, 60 sec: 39867.8, 300 sec: 41015.5). Total num frames: 2607996928. Throughput: 0: 10139.1. Samples: 401988108. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:33,976][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:34,300][626795] Updated weights for policy 0, policy_version 318362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:36,441][626795] Updated weights for policy 0, policy_version 318372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:38,375][626795] Updated weights for policy 0, policy_version 318382 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:38,976][24592] Fps is (10 sec: 41777.6, 60 sec: 40140.5, 300 sec: 41015.5). Total num frames: 2608209920. Throughput: 0: 10143.0. Samples: 402050004. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:38,977][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:40,385][626795] Updated weights for policy 0, policy_version 318392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:42,355][626795] Updated weights for policy 0, policy_version 318402 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:43,976][24592] Fps is (10 sec: 41777.4, 60 sec: 40686.7, 300 sec: 40987.7). Total num frames: 2608414720. Throughput: 0: 10123.0. Samples: 402080052. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:43,977][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:44,418][626795] Updated weights for policy 0, policy_version 318412 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:46,443][626795] Updated weights for policy 0, policy_version 318422 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:48,975][24592] Fps is (10 sec: 37685.1, 60 sec: 40140.9, 300 sec: 40932.2). Total num frames: 2608586752. Throughput: 0: 10111.8. Samples: 402141408. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:48,976][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:49,160][626795] Updated weights for policy 0, policy_version 318432 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:50,940][626795] Updated weights for policy 0, policy_version 318442 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:53,033][626795] Updated weights for policy 0, policy_version 318452 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:53,976][24592] Fps is (10 sec: 37683.4, 60 sec: 40140.6, 300 sec: 40904.4). Total num frames: 2608791552. Throughput: 0: 9969.0. Samples: 402196380. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:53,978][24592] Avg episode reward: [(0, '4.350')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:55,049][626795] Updated weights for policy 0, policy_version 318462 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:57,101][626795] Updated weights for policy 0, policy_version 318472 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:58,976][24592] Fps is (10 sec: 40958.5, 60 sec: 40277.3, 300 sec: 40848.9). Total num frames: 2608996352. Throughput: 0: 9969.9. Samples: 402226932. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:57:58,977][24592] Avg episode reward: [(0, '4.954')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:57:58,987][626795] Updated weights for policy 0, policy_version 318482 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:01,099][626795] Updated weights for policy 0, policy_version 318492 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:03,051][626795] Updated weights for policy 0, policy_version 318502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:03,976][24592] Fps is (10 sec: 40960.2, 60 sec: 40140.8, 300 sec: 40848.9). Total num frames: 2609201152. Throughput: 0: 10112.7. Samples: 402288822. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:03,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:05,016][626795] Updated weights for policy 0, policy_version 318512 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:07,029][626795] Updated weights for policy 0, policy_version 318522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:08,976][24592] Fps is (10 sec: 40960.6, 60 sec: 40140.7, 300 sec: 40821.1). Total num frames: 2609405952. Throughput: 0: 10109.9. Samples: 402350502. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:08,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:09,010][626795] Updated weights for policy 0, policy_version 318532 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:11,125][626795] Updated weights for policy 0, policy_version 318542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:13,085][626795] Updated weights for policy 0, policy_version 318552 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:13,975][24592] Fps is (10 sec: 40961.1, 60 sec: 40140.8, 300 sec: 40793.6). Total num frames: 2609610752. Throughput: 0: 10082.3. Samples: 402380280. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:13,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:15,094][626795] Updated weights for policy 0, policy_version 318562 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:17,050][626795] Updated weights for policy 0, policy_version 318572 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:18,975][24592] Fps is (10 sec: 40960.8, 60 sec: 40550.4, 300 sec: 40765.8). Total num frames: 2609815552. Throughput: 0: 10085.2. Samples: 402441942. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:18,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:19,183][626795] Updated weights for policy 0, policy_version 318582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:21,608][626795] Updated weights for policy 0, policy_version 318592 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:23,619][626795] Updated weights for policy 0, policy_version 318602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:23,975][24592] Fps is (10 sec: 38502.4, 60 sec: 40140.9, 300 sec: 40765.6). Total num frames: 2609995776. Throughput: 0: 9949.4. Samples: 402497724. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:23,976][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:25,651][626795] Updated weights for policy 0, policy_version 318612 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:27,691][626795] Updated weights for policy 0, policy_version 318622 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:28,976][24592] Fps is (10 sec: 38502.2, 60 sec: 40140.8, 300 sec: 40737.8). Total num frames: 2610200576. Throughput: 0: 9956.7. Samples: 402528102. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:28,979][24592] Avg episode reward: [(0, '4.726')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:29,743][626795] Updated weights for policy 0, policy_version 318632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:31,647][626795] Updated weights for policy 0, policy_version 318642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:33,659][626795] Updated weights for policy 0, policy_version 318652 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:33,975][24592] Fps is (10 sec: 41779.4, 60 sec: 40277.4, 300 sec: 40737.8). Total num frames: 2610413568. Throughput: 0: 9955.9. Samples: 402589422. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:33,976][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:35,656][626795] Updated weights for policy 0, policy_version 318662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:37,648][626795] Updated weights for policy 0, policy_version 318672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:38,975][24592] Fps is (10 sec: 41779.0, 60 sec: 40141.0, 300 sec: 40710.1). Total num frames: 2610618368. Throughput: 0: 10122.2. Samples: 402651876. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:38,977][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:39,637][626795] Updated weights for policy 0, policy_version 318682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:41,642][626795] Updated weights for policy 0, policy_version 318692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:43,638][626795] Updated weights for policy 0, policy_version 318702 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:43,975][24592] Fps is (10 sec: 40140.6, 60 sec: 40004.5, 300 sec: 40654.5). Total num frames: 2610814976. Throughput: 0: 10116.7. Samples: 402682182. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:43,978][24592] Avg episode reward: [(0, '4.362')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:45,625][626795] Updated weights for policy 0, policy_version 318712 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:47,632][626795] Updated weights for policy 0, policy_version 318722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:48,976][24592] Fps is (10 sec: 40139.9, 60 sec: 40550.2, 300 sec: 40626.7). Total num frames: 2611019776. Throughput: 0: 10105.6. Samples: 402743574. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:48,977][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:49,569][626795] Updated weights for policy 0, policy_version 318732 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:51,587][626795] Updated weights for policy 0, policy_version 318742 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:54,090][24592] Fps is (10 sec: 38875.9, 60 sec: 40200.7, 300 sec: 40638.7). Total num frames: 2611208192. Throughput: 0: 9395.0. Samples: 402774354. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:54,092][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:54,189][626795] Updated weights for policy 0, policy_version 318752 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:56,348][626795] Updated weights for policy 0, policy_version 318762 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:58:58,290][626795] Updated weights for policy 0, policy_version 318772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:58,976][24592] Fps is (10 sec: 38502.7, 60 sec: 40140.9, 300 sec: 40599.0). Total num frames: 2611404800. Throughput: 0: 9960.2. Samples: 402828492. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:58:58,977][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:00,340][626795] Updated weights for policy 0, policy_version 318782 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:02,296][626795] Updated weights for policy 0, policy_version 318792 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:03,975][24592] Fps is (10 sec: 40606.2, 60 sec: 40141.0, 300 sec: 40571.2). Total num frames: 2611609600. Throughput: 0: 9960.8. Samples: 402890178. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:03,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000318800_2611609600.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:04,128][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000317617_2601918464.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:04,422][626795] Updated weights for policy 0, policy_version 318802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:06,317][626795] Updated weights for policy 0, policy_version 318812 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:08,314][626795] Updated weights for policy 0, policy_version 318822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:08,976][24592] Fps is (10 sec: 40140.2, 60 sec: 40004.1, 300 sec: 40515.6). Total num frames: 2611806208. Throughput: 0: 10067.6. Samples: 402950772. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:08,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:10,421][626795] Updated weights for policy 0, policy_version 318832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:12,401][626795] Updated weights for policy 0, policy_version 318842 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:13,976][24592] Fps is (10 sec: 40140.4, 60 sec: 40004.2, 300 sec: 40515.7). Total num frames: 2612011008. Throughput: 0: 10071.0. Samples: 402981300. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:13,977][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:14,287][626795] Updated weights for policy 0, policy_version 318852 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:16,365][626795] Updated weights for policy 0, policy_version 318862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:18,370][626795] Updated weights for policy 0, policy_version 318872 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:18,975][24592] Fps is (10 sec: 40961.2, 60 sec: 40004.2, 300 sec: 40487.9). Total num frames: 2612215808. Throughput: 0: 10094.4. Samples: 403043670. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:18,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:20,374][626795] Updated weights for policy 0, policy_version 318882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:22,272][626795] Updated weights for policy 0, policy_version 318892 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:23,976][24592] Fps is (10 sec: 41776.6, 60 sec: 40549.9, 300 sec: 40487.8). Total num frames: 2612428800. Throughput: 0: 10080.0. Samples: 403105482. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:23,978][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:24,325][626795] Updated weights for policy 0, policy_version 318902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:26,983][626795] Updated weights for policy 0, policy_version 318912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:28,901][626795] Updated weights for policy 0, policy_version 318922 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:28,986][24592] Fps is (10 sec: 39281.5, 60 sec: 40133.9, 300 sec: 40486.5). Total num frames: 2612609024. Throughput: 0: 10028.1. Samples: 403133550. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:28,987][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:30,890][626795] Updated weights for policy 0, policy_version 318932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:32,821][626795] Updated weights for policy 0, policy_version 318942 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:33,975][24592] Fps is (10 sec: 38505.0, 60 sec: 40004.2, 300 sec: 40460.1). Total num frames: 2612813824. Throughput: 0: 9967.9. Samples: 403192128. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:33,978][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:34,888][626795] Updated weights for policy 0, policy_version 318952 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:36,817][626795] Updated weights for policy 0, policy_version 318962 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:38,779][626795] Updated weights for policy 0, policy_version 318972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:38,975][24592] Fps is (10 sec: 41822.4, 60 sec: 40140.9, 300 sec: 40460.3). Total num frames: 2613026816. Throughput: 0: 10686.6. Samples: 403254024. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:38,976][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:40,776][626795] Updated weights for policy 0, policy_version 318982 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:42,792][626795] Updated weights for policy 0, policy_version 318992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:43,975][24592] Fps is (10 sec: 41779.8, 60 sec: 40277.4, 300 sec: 40460.2). Total num frames: 2613231616. Throughput: 0: 10140.3. Samples: 403284804. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:43,976][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:44,828][626795] Updated weights for policy 0, policy_version 319002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:46,801][626795] Updated weights for policy 0, policy_version 319012 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:48,792][626795] Updated weights for policy 0, policy_version 319022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:48,975][24592] Fps is (10 sec: 40140.7, 60 sec: 40141.0, 300 sec: 40404.7). Total num frames: 2613428224. Throughput: 0: 10133.3. Samples: 403346178. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:48,976][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:50,666][626795] Updated weights for policy 0, policy_version 319032 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:52,730][626795] Updated weights for policy 0, policy_version 319042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:53,976][24592] Fps is (10 sec: 40957.1, 60 sec: 40627.6, 300 sec: 40404.6). Total num frames: 2613641216. Throughput: 0: 10172.5. Samples: 403408536. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:53,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:54,771][626795] Updated weights for policy 0, policy_version 319052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:56,684][626795] Updated weights for policy 0, policy_version 319062 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 18:59:58,645][626795] Updated weights for policy 0, policy_version 319072 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:58,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40687.1, 300 sec: 40460.2). Total num frames: 2613846016. Throughput: 0: 10185.0. Samples: 403439622. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 18:59:58,976][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:01,220][626795] Updated weights for policy 0, policy_version 319082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:03,239][626795] Updated weights for policy 0, policy_version 319092 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:03,975][24592] Fps is (10 sec: 38505.0, 60 sec: 40277.4, 300 sec: 40404.6). Total num frames: 2614026240. Throughput: 0: 10041.1. Samples: 403495518. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:03,977][24592] Avg episode reward: [(0, '4.895')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:05,261][626795] Updated weights for policy 0, policy_version 319102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:07,240][626795] Updated weights for policy 0, policy_version 319112 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:08,975][24592] Fps is (10 sec: 39321.8, 60 sec: 40550.7, 300 sec: 40404.6). Total num frames: 2614239232. Throughput: 0: 10033.2. Samples: 403556970. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:08,978][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:09,281][626795] Updated weights for policy 0, policy_version 319122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:11,200][626795] Updated weights for policy 0, policy_version 319132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:13,236][626795] Updated weights for policy 0, policy_version 319142 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:13,975][24592] Fps is (10 sec: 40959.8, 60 sec: 40414.0, 300 sec: 40349.1). Total num frames: 2614435840. Throughput: 0: 10095.2. Samples: 403587732. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:13,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:15,110][626795] Updated weights for policy 0, policy_version 319152 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:17,141][626795] Updated weights for policy 0, policy_version 319162 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:18,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40550.4, 300 sec: 40349.1). Total num frames: 2614648832. Throughput: 0: 10168.1. Samples: 403649694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:18,977][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:19,127][626795] Updated weights for policy 0, policy_version 319172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:21,147][626795] Updated weights for policy 0, policy_version 319182 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:23,002][626795] Updated weights for policy 0, policy_version 319192 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40414.4, 300 sec: 40349.1). Total num frames: 2614853632. Throughput: 0: 10172.1. Samples: 403711770. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:23,977][24592] Avg episode reward: [(0, '4.706')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:25,091][626795] Updated weights for policy 0, policy_version 319202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:27,031][626795] Updated weights for policy 0, policy_version 319212 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:28,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40830.5, 300 sec: 40321.3). Total num frames: 2615058432. Throughput: 0: 10187.3. Samples: 403743234. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:28,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:28,999][626795] Updated weights for policy 0, policy_version 319222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:30,979][626795] Updated weights for policy 0, policy_version 319232 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:33,570][626795] Updated weights for policy 0, policy_version 319242 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:33,976][24592] Fps is (10 sec: 38500.5, 60 sec: 40413.6, 300 sec: 40293.5). Total num frames: 2615238656. Throughput: 0: 10083.9. Samples: 403799958. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:33,977][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:35,640][626795] Updated weights for policy 0, policy_version 319252 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:37,628][626795] Updated weights for policy 0, policy_version 319262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:38,976][24592] Fps is (10 sec: 39320.8, 60 sec: 40413.7, 300 sec: 40321.3). Total num frames: 2615451648. Throughput: 0: 10044.2. Samples: 403860522. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:38,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:39,546][626795] Updated weights for policy 0, policy_version 319272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:41,517][626795] Updated weights for policy 0, policy_version 319282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:43,572][626795] Updated weights for policy 0, policy_version 319292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:43,975][24592] Fps is (10 sec: 40962.2, 60 sec: 40277.3, 300 sec: 40265.8). Total num frames: 2615648256. Throughput: 0: 10041.9. Samples: 403891506. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:43,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:45,601][626795] Updated weights for policy 0, policy_version 319302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:47,611][626795] Updated weights for policy 0, policy_version 319312 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:48,975][24592] Fps is (10 sec: 40961.1, 60 sec: 40550.4, 300 sec: 40293.5). Total num frames: 2615861248. Throughput: 0: 10159.5. Samples: 403952694. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:48,977][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:49,628][626795] Updated weights for policy 0, policy_version 319322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:51,570][626795] Updated weights for policy 0, policy_version 319332 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:53,579][626795] Updated weights for policy 0, policy_version 319342 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:53,975][24592] Fps is (10 sec: 41779.0, 60 sec: 40414.3, 300 sec: 40265.8). Total num frames: 2616066048. Throughput: 0: 10155.6. Samples: 404013972. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:53,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:55,709][626795] Updated weights for policy 0, policy_version 319352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:57,544][626795] Updated weights for policy 0, policy_version 319362 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:58,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40413.9, 300 sec: 40238.0). Total num frames: 2616270848. Throughput: 0: 10152.4. Samples: 404044590. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:00:58,978][24592] Avg episode reward: [(0, '4.838')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:00:59,595][626795] Updated weights for policy 0, policy_version 319372 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:01,553][626795] Updated weights for policy 0, policy_version 319382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:03,474][626795] Updated weights for policy 0, policy_version 319392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:03,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40823.5, 300 sec: 40210.2). Total num frames: 2616475648. Throughput: 0: 10151.3. Samples: 404106504. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:03,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000319394_2616475648.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:04,141][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000318212_2606792704.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:06,197][626795] Updated weights for policy 0, policy_version 319402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:08,125][626795] Updated weights for policy 0, policy_version 319412 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:08,976][24592] Fps is (10 sec: 38500.6, 60 sec: 40277.0, 300 sec: 40210.2). Total num frames: 2616655872. Throughput: 0: 10005.0. Samples: 404161998. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:08,978][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:10,061][626795] Updated weights for policy 0, policy_version 319422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:12,052][626795] Updated weights for policy 0, policy_version 319432 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:13,965][626795] Updated weights for policy 0, policy_version 319442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:13,975][24592] Fps is (10 sec: 39321.3, 60 sec: 40550.4, 300 sec: 40238.0). Total num frames: 2616868864. Throughput: 0: 10001.5. Samples: 404193300. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:13,978][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:15,951][626795] Updated weights for policy 0, policy_version 319452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:17,953][626795] Updated weights for policy 0, policy_version 319462 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:18,976][24592] Fps is (10 sec: 41777.2, 60 sec: 40413.3, 300 sec: 40265.7). Total num frames: 2617073664. Throughput: 0: 10136.8. Samples: 404256120. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:18,978][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:19,899][626795] Updated weights for policy 0, policy_version 319472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:21,901][626795] Updated weights for policy 0, policy_version 319482 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:23,904][626795] Updated weights for policy 0, policy_version 319492 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:23,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40413.8, 300 sec: 40265.8). Total num frames: 2617278464. Throughput: 0: 10173.4. Samples: 404318322. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:23,976][24592] Avg episode reward: [(0, '4.993')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:25,863][626795] Updated weights for policy 0, policy_version 319502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:27,793][626795] Updated weights for policy 0, policy_version 319512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:28,976][24592] Fps is (10 sec: 40962.0, 60 sec: 40413.6, 300 sec: 40265.7). Total num frames: 2617483264. Throughput: 0: 10176.0. Samples: 404349432. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:28,977][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:29,797][626795] Updated weights for policy 0, policy_version 319522 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:31,824][626795] Updated weights for policy 0, policy_version 319532 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:33,758][626795] Updated weights for policy 0, policy_version 319542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:33,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40960.3, 300 sec: 40321.3). Total num frames: 2617696256. Throughput: 0: 10195.4. Samples: 404411490. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:33,976][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:35,743][626795] Updated weights for policy 0, policy_version 319552 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:37,691][626795] Updated weights for policy 0, policy_version 319562 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:38,976][24592] Fps is (10 sec: 38503.0, 60 sec: 40277.3, 300 sec: 40321.3). Total num frames: 2617868288. Throughput: 0: 10054.9. Samples: 404466444. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:38,977][24592] Avg episode reward: [(0, '4.376')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:40,423][626795] Updated weights for policy 0, policy_version 319572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:42,381][626795] Updated weights for policy 0, policy_version 319582 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:43,975][24592] Fps is (10 sec: 38502.1, 60 sec: 40550.3, 300 sec: 40349.1). Total num frames: 2618081280. Throughput: 0: 10045.4. Samples: 404496636. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:43,977][24592] Avg episode reward: [(0, '4.801')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:44,403][626795] Updated weights for policy 0, policy_version 319592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:46,381][626795] Updated weights for policy 0, policy_version 319602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:48,400][626795] Updated weights for policy 0, policy_version 319612 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:48,975][24592] Fps is (10 sec: 41779.9, 60 sec: 40413.7, 300 sec: 40349.1). Total num frames: 2618286080. Throughput: 0: 10053.0. Samples: 404558892. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:48,977][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:50,327][626795] Updated weights for policy 0, policy_version 319622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:52,253][626795] Updated weights for policy 0, policy_version 319632 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:53,975][24592] Fps is (10 sec: 40960.4, 60 sec: 40413.9, 300 sec: 40376.9). Total num frames: 2618490880. Throughput: 0: 10208.2. Samples: 404621364. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:53,977][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:54,379][626795] Updated weights for policy 0, policy_version 319642 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:56,230][626795] Updated weights for policy 0, policy_version 319652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:01:58,186][626795] Updated weights for policy 0, policy_version 319662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:58,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40413.8, 300 sec: 40349.1). Total num frames: 2618695680. Throughput: 0: 10191.9. Samples: 404651934. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:01:58,976][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:00,319][626795] Updated weights for policy 0, policy_version 319672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:02,070][626795] Updated weights for policy 0, policy_version 319682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:03,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40413.9, 300 sec: 40349.1). Total num frames: 2618900480. Throughput: 0: 10173.1. Samples: 404713902. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:03,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:04,215][626795] Updated weights for policy 0, policy_version 319692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:06,093][626795] Updated weights for policy 0, policy_version 319702 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:08,069][626795] Updated weights for policy 0, policy_version 319712 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:08,976][24592] Fps is (10 sec: 41778.6, 60 sec: 40960.1, 300 sec: 40376.8). Total num frames: 2619113472. Throughput: 0: 10174.1. Samples: 404776158. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:08,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:10,198][626795] Updated weights for policy 0, policy_version 319722 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:12,844][626795] Updated weights for policy 0, policy_version 319732 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:13,875][626772] Signal inference workers to stop experience collection... (5250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:13,877][626772] Signal inference workers to resume experience collection... (5250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:13,890][626795] InferenceWorker_p0-w0: stopping experience collection (5250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:13,899][626795] InferenceWorker_p0-w0: resuming experience collection (5250 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:13,975][24592] Fps is (10 sec: 39321.3, 60 sec: 40413.8, 300 sec: 40376.8). Total num frames: 2619293696. Throughput: 0: 10005.7. Samples: 404799684. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:13,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:14,776][626795] Updated weights for policy 0, policy_version 319742 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:16,790][626795] Updated weights for policy 0, policy_version 319752 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:18,763][626795] Updated weights for policy 0, policy_version 319762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:18,975][24592] Fps is (10 sec: 38503.2, 60 sec: 40414.5, 300 sec: 40376.9). Total num frames: 2619498496. Throughput: 0: 10017.2. Samples: 404862264. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:18,976][24592] Avg episode reward: [(0, '4.926')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:20,774][626795] Updated weights for policy 0, policy_version 319772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:22,641][626795] Updated weights for policy 0, policy_version 319782 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:23,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40413.9, 300 sec: 40376.8). Total num frames: 2619703296. Throughput: 0: 10164.7. Samples: 404923854. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:23,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:24,759][626795] Updated weights for policy 0, policy_version 319792 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:26,676][626795] Updated weights for policy 0, policy_version 319802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:28,782][626795] Updated weights for policy 0, policy_version 319812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:28,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40414.2, 300 sec: 40376.8). Total num frames: 2619908096. Throughput: 0: 10176.6. Samples: 404954580. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:28,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:30,645][626795] Updated weights for policy 0, policy_version 319822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:32,571][626795] Updated weights for policy 0, policy_version 319832 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:33,975][24592] Fps is (10 sec: 40960.3, 60 sec: 40277.4, 300 sec: 40349.1). Total num frames: 2620112896. Throughput: 0: 10181.2. Samples: 405017046. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:33,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:34,629][626795] Updated weights for policy 0, policy_version 319842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:36,674][626795] Updated weights for policy 0, policy_version 319852 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:38,585][626795] Updated weights for policy 0, policy_version 319862 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:38,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40823.6, 300 sec: 40349.1). Total num frames: 2620317696. Throughput: 0: 10151.2. Samples: 405078168. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:38,977][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:40,648][626795] Updated weights for policy 0, policy_version 319872 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:42,526][626795] Updated weights for policy 0, policy_version 319882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:44,212][24592] Fps is (10 sec: 39213.0, 60 sec: 40391.2, 300 sec: 40400.0). Total num frames: 2620514304. Throughput: 0: 10108.0. Samples: 405109182. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:44,213][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:45,239][626795] Updated weights for policy 0, policy_version 319892 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:47,200][626795] Updated weights for policy 0, policy_version 319902 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:48,975][24592] Fps is (10 sec: 39321.8, 60 sec: 40414.0, 300 sec: 40404.7). Total num frames: 2620710912. Throughput: 0: 10015.6. Samples: 405164604. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:48,977][24592] Avg episode reward: [(0, '4.880')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:49,163][626795] Updated weights for policy 0, policy_version 319912 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:51,140][626795] Updated weights for policy 0, policy_version 319922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:53,216][626795] Updated weights for policy 0, policy_version 319932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:53,975][24592] Fps is (10 sec: 40274.3, 60 sec: 40277.3, 300 sec: 40376.9). Total num frames: 2620907520. Throughput: 0: 10015.3. Samples: 405226842. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:53,976][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:55,136][626795] Updated weights for policy 0, policy_version 319942 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:57,165][626795] Updated weights for policy 0, policy_version 319952 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:58,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40413.9, 300 sec: 40404.7). Total num frames: 2621120512. Throughput: 0: 10170.4. Samples: 405257352. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:02:58,977][24592] Avg episode reward: [(0, '4.488')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:02:59,211][626795] Updated weights for policy 0, policy_version 319962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:01,157][626795] Updated weights for policy 0, policy_version 319972 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:03,160][626795] Updated weights for policy 0, policy_version 319982 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40413.9, 300 sec: 40404.6). Total num frames: 2621325312. Throughput: 0: 10152.9. Samples: 405319146. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:03,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000319986_2621325312.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:04,114][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000318800_2611609600.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:05,304][626795] Updated weights for policy 0, policy_version 319992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:07,224][626795] Updated weights for policy 0, policy_version 320002 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:08,975][24592] Fps is (10 sec: 40959.8, 60 sec: 40277.5, 300 sec: 40404.6). Total num frames: 2621530112. Throughput: 0: 10140.8. Samples: 405380190. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:08,976][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:09,152][626795] Updated weights for policy 0, policy_version 320012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:11,205][626795] Updated weights for policy 0, policy_version 320022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:13,192][626795] Updated weights for policy 0, policy_version 320032 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:13,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40687.0, 300 sec: 40404.6). Total num frames: 2621734912. Throughput: 0: 10132.6. Samples: 405410550. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:13,976][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:15,225][626795] Updated weights for policy 0, policy_version 320042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:17,741][626795] Updated weights for policy 0, policy_version 320052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:18,975][24592] Fps is (10 sec: 38502.7, 60 sec: 40277.4, 300 sec: 40404.6). Total num frames: 2621915136. Throughput: 0: 9974.9. Samples: 405465918. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:18,976][24592] Avg episode reward: [(0, '4.895')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:19,830][626795] Updated weights for policy 0, policy_version 320062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:21,764][626795] Updated weights for policy 0, policy_version 320072 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:23,679][626795] Updated weights for policy 0, policy_version 320082 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:23,976][24592] Fps is (10 sec: 38500.9, 60 sec: 40277.1, 300 sec: 40404.6). Total num frames: 2622119936. Throughput: 0: 9985.8. Samples: 405527532. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:23,977][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:25,772][626795] Updated weights for policy 0, policy_version 320092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:27,738][626795] Updated weights for policy 0, policy_version 320102 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:28,975][24592] Fps is (10 sec: 40959.7, 60 sec: 40277.3, 300 sec: 40376.8). Total num frames: 2622324736. Throughput: 0: 10024.4. Samples: 405557910. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:28,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:29,750][626795] Updated weights for policy 0, policy_version 320112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:31,726][626795] Updated weights for policy 0, policy_version 320122 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:33,752][626795] Updated weights for policy 0, policy_version 320132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:33,975][24592] Fps is (10 sec: 40961.9, 60 sec: 40277.3, 300 sec: 40376.9). Total num frames: 2622529536. Throughput: 0: 10125.5. Samples: 405620250. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:33,976][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:35,657][626795] Updated weights for policy 0, policy_version 320142 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:37,729][626795] Updated weights for policy 0, policy_version 320152 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:38,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40277.4, 300 sec: 40404.6). Total num frames: 2622734336. Throughput: 0: 10109.1. Samples: 405681750. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:38,977][24592] Avg episode reward: [(0, '5.022')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:39,786][626795] Updated weights for policy 0, policy_version 320162 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:41,710][626795] Updated weights for policy 0, policy_version 320172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:43,601][626795] Updated weights for policy 0, policy_version 320182 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:43,975][24592] Fps is (10 sec: 40959.6, 60 sec: 40573.8, 300 sec: 40404.6). Total num frames: 2622939136. Throughput: 0: 10112.9. Samples: 405712434. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:43,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:45,883][626795] Updated weights for policy 0, policy_version 320192 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:48,111][626795] Updated weights for policy 0, policy_version 320202 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:48,975][24592] Fps is (10 sec: 38502.0, 60 sec: 40140.7, 300 sec: 40392.5). Total num frames: 2623119360. Throughput: 0: 10041.2. Samples: 405771000. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:48,976][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:50,980][626795] Updated weights for policy 0, policy_version 320212 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:52,856][626795] Updated weights for policy 0, policy_version 320222 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:53,975][24592] Fps is (10 sec: 35225.9, 60 sec: 39731.2, 300 sec: 40293.6). Total num frames: 2623291392. Throughput: 0: 9813.9. Samples: 405821814. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:53,977][24592] Avg episode reward: [(0, '5.108')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:55,067][626795] Updated weights for policy 0, policy_version 320232 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:57,491][626795] Updated weights for policy 0, policy_version 320242 (0.0032)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:58,975][24592] Fps is (10 sec: 36864.5, 60 sec: 39458.1, 300 sec: 40265.8). Total num frames: 2623488000. Throughput: 0: 9777.5. Samples: 405850536. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:03:58,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:03:59,595][626795] Updated weights for policy 0, policy_version 320252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:01,514][626795] Updated weights for policy 0, policy_version 320262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:03,689][626795] Updated weights for policy 0, policy_version 320272 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:03,975][24592] Fps is (10 sec: 38502.4, 60 sec: 39185.1, 300 sec: 40238.0). Total num frames: 2623676416. Throughput: 0: 9802.7. Samples: 405907038. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:03,976][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:05,895][626795] Updated weights for policy 0, policy_version 320282 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:07,883][626795] Updated weights for policy 0, policy_version 320292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:08,975][24592] Fps is (10 sec: 38502.2, 60 sec: 39048.5, 300 sec: 40210.2). Total num frames: 2623873024. Throughput: 0: 9760.2. Samples: 405966738. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:08,978][24592] Avg episode reward: [(0, '4.900')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:09,835][626795] Updated weights for policy 0, policy_version 320302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:11,804][626795] Updated weights for policy 0, policy_version 320312 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:13,839][626795] Updated weights for policy 0, policy_version 320322 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:13,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39048.6, 300 sec: 40210.2). Total num frames: 2624077824. Throughput: 0: 9764.9. Samples: 405997332. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:13,977][24592] Avg episode reward: [(0, '4.872')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:15,920][626795] Updated weights for policy 0, policy_version 320332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:18,116][626795] Updated weights for policy 0, policy_version 320342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:18,975][24592] Fps is (10 sec: 40141.0, 60 sec: 39321.6, 300 sec: 40154.8). Total num frames: 2624274432. Throughput: 0: 9728.8. Samples: 406058046. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:18,976][24592] Avg episode reward: [(0, '4.830')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:20,058][626795] Updated weights for policy 0, policy_version 320352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:21,980][626795] Updated weights for policy 0, policy_version 320362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:23,976][24592] Fps is (10 sec: 37681.1, 60 sec: 38911.9, 300 sec: 40156.0). Total num frames: 2624454656. Throughput: 0: 9559.2. Samples: 406111920. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:23,977][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:24,728][626795] Updated weights for policy 0, policy_version 320372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:26,650][626795] Updated weights for policy 0, policy_version 320382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:28,507][626795] Updated weights for policy 0, policy_version 320392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:28,975][24592] Fps is (10 sec: 38502.3, 60 sec: 38912.0, 300 sec: 40154.7). Total num frames: 2624659456. Throughput: 0: 9573.0. Samples: 406143216. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:28,977][24592] Avg episode reward: [(0, '4.797')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:30,492][626795] Updated weights for policy 0, policy_version 320402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:32,504][626795] Updated weights for policy 0, policy_version 320412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:33,976][24592] Fps is (10 sec: 41780.4, 60 sec: 39048.3, 300 sec: 40154.6). Total num frames: 2624872448. Throughput: 0: 9678.0. Samples: 406206510. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:33,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:34,424][626795] Updated weights for policy 0, policy_version 320422 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:36,491][626795] Updated weights for policy 0, policy_version 320432 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:38,249][626795] Updated weights for policy 0, policy_version 320442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:38,976][24592] Fps is (10 sec: 41777.9, 60 sec: 39048.3, 300 sec: 40154.6). Total num frames: 2625077248. Throughput: 0: 9934.3. Samples: 406268862. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:38,978][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:40,379][626795] Updated weights for policy 0, policy_version 320452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:42,297][626795] Updated weights for policy 0, policy_version 320462 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:43,976][24592] Fps is (10 sec: 41777.9, 60 sec: 39184.7, 300 sec: 40210.1). Total num frames: 2625290240. Throughput: 0: 9983.1. Samples: 406299780. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:43,978][24592] Avg episode reward: [(0, '4.878')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:44,317][626795] Updated weights for policy 0, policy_version 320472 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:46,166][626795] Updated weights for policy 0, policy_version 320482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:48,022][626795] Updated weights for policy 0, policy_version 320492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:48,976][24592] Fps is (10 sec: 42598.1, 60 sec: 39731.0, 300 sec: 40210.3). Total num frames: 2625503232. Throughput: 0: 10140.8. Samples: 406363380. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:48,977][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:50,106][626795] Updated weights for policy 0, policy_version 320502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:52,096][626795] Updated weights for policy 0, policy_version 320512 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:53,928][626795] Updated weights for policy 0, policy_version 320522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:53,975][24592] Fps is (10 sec: 42600.5, 60 sec: 40413.8, 300 sec: 40238.0). Total num frames: 2625716224. Throughput: 0: 10219.6. Samples: 406426620. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:53,978][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:56,752][626795] Updated weights for policy 0, policy_version 320532 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:04:58,807][626795] Updated weights for policy 0, policy_version 320542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:58,977][24592] Fps is (10 sec: 37678.0, 60 sec: 39866.5, 300 sec: 40182.2). Total num frames: 2625880064. Throughput: 0: 10059.3. Samples: 406450020. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:04:58,980][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:00,932][626795] Updated weights for policy 0, policy_version 320552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:03,072][626795] Updated weights for policy 0, policy_version 320562 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:03,976][24592] Fps is (10 sec: 36042.2, 60 sec: 40003.7, 300 sec: 40126.8). Total num frames: 2626076672. Throughput: 0: 10002.5. Samples: 406508166. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:03,978][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:03,990][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000320567_2626084864.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:04,125][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000319394_2616475648.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:05,028][626795] Updated weights for policy 0, policy_version 320572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:07,028][626795] Updated weights for policy 0, policy_version 320582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:08,975][24592] Fps is (10 sec: 40147.5, 60 sec: 40140.7, 300 sec: 40154.7). Total num frames: 2626281472. Throughput: 0: 10156.5. Samples: 406568958. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:08,978][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:09,173][626795] Updated weights for policy 0, policy_version 320592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:11,090][626795] Updated weights for policy 0, policy_version 320602 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:13,092][626795] Updated weights for policy 0, policy_version 320612 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:13,976][24592] Fps is (10 sec: 40961.6, 60 sec: 40140.5, 300 sec: 40126.9). Total num frames: 2626486272. Throughput: 0: 10129.9. Samples: 406599066. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:13,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:15,169][626795] Updated weights for policy 0, policy_version 320622 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:17,109][626795] Updated weights for policy 0, policy_version 320632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:18,977][24592] Fps is (10 sec: 40952.9, 60 sec: 40276.1, 300 sec: 40126.7). Total num frames: 2626691072. Throughput: 0: 10084.6. Samples: 406660332. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:18,979][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:19,223][626795] Updated weights for policy 0, policy_version 320642 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:21,173][626795] Updated weights for policy 0, policy_version 320652 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:23,169][626795] Updated weights for policy 0, policy_version 320662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:23,976][24592] Fps is (10 sec: 40140.7, 60 sec: 40550.5, 300 sec: 40099.1). Total num frames: 2626887680. Throughput: 0: 10044.5. Samples: 406720866. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:23,977][24592] Avg episode reward: [(0, '4.982')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:25,216][626795] Updated weights for policy 0, policy_version 320672 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:27,182][626795] Updated weights for policy 0, policy_version 320682 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:28,975][24592] Fps is (10 sec: 37690.1, 60 sec: 40140.8, 300 sec: 40099.2). Total num frames: 2627067904. Throughput: 0: 10040.1. Samples: 406751580. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:28,976][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:29,909][626795] Updated weights for policy 0, policy_version 320692 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:31,837][626795] Updated weights for policy 0, policy_version 320702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:33,874][626795] Updated weights for policy 0, policy_version 320712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:33,976][24592] Fps is (10 sec: 38503.7, 60 sec: 40004.4, 300 sec: 40071.4). Total num frames: 2627272704. Throughput: 0: 9843.5. Samples: 406806336. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:33,977][24592] Avg episode reward: [(0, '4.368')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:35,825][626795] Updated weights for policy 0, policy_version 320722 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:37,895][626795] Updated weights for policy 0, policy_version 320732 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:38,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40141.0, 300 sec: 40126.9). Total num frames: 2627485696. Throughput: 0: 9819.9. Samples: 406868514. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:38,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:39,807][626795] Updated weights for policy 0, policy_version 320742 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:41,763][626795] Updated weights for policy 0, policy_version 320752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:43,778][626795] Updated weights for policy 0, policy_version 320762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:43,976][24592] Fps is (10 sec: 41777.8, 60 sec: 40004.4, 300 sec: 40099.1). Total num frames: 2627690496. Throughput: 0: 9980.0. Samples: 406899108. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:43,977][24592] Avg episode reward: [(0, '4.878')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:45,734][626795] Updated weights for policy 0, policy_version 320772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:47,906][626795] Updated weights for policy 0, policy_version 320782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:48,976][24592] Fps is (10 sec: 40137.3, 60 sec: 39730.9, 300 sec: 40071.3). Total num frames: 2627887104. Throughput: 0: 10047.9. Samples: 406960320. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:48,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:50,219][626795] Updated weights for policy 0, policy_version 320792 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:52,221][626795] Updated weights for policy 0, policy_version 320802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:53,975][24592] Fps is (10 sec: 39323.3, 60 sec: 39458.2, 300 sec: 40043.6). Total num frames: 2628083712. Throughput: 0: 9998.0. Samples: 407018868. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:53,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:54,115][626795] Updated weights for policy 0, policy_version 320812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:56,112][626795] Updated weights for policy 0, policy_version 320822 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:05:58,007][626795] Updated weights for policy 0, policy_version 320832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:58,975][24592] Fps is (10 sec: 40143.9, 60 sec: 40141.9, 300 sec: 40043.6). Total num frames: 2628288512. Throughput: 0: 10011.8. Samples: 407049594. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:05:58,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:00,261][626795] Updated weights for policy 0, policy_version 320842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:03,293][626795] Updated weights for policy 0, policy_version 320852 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:03,976][24592] Fps is (10 sec: 36041.9, 60 sec: 39458.1, 300 sec: 39960.2). Total num frames: 2628444160. Throughput: 0: 9766.1. Samples: 407099796. Policy #0 lag: (min: 0.0, avg: 2.7, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:03,977][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:05,593][626795] Updated weights for policy 0, policy_version 320862 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:07,855][626795] Updated weights for policy 0, policy_version 320872 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:08,975][24592] Fps is (10 sec: 33587.5, 60 sec: 39048.6, 300 sec: 39849.2). Total num frames: 2628624384. Throughput: 0: 9639.3. Samples: 407154630. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:08,976][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:09,995][626795] Updated weights for policy 0, policy_version 320882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:12,275][626795] Updated weights for policy 0, policy_version 320892 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:13,975][24592] Fps is (10 sec: 36866.9, 60 sec: 38775.7, 300 sec: 39793.8). Total num frames: 2628812800. Throughput: 0: 9562.9. Samples: 407181912. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:13,977][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:14,558][626795] Updated weights for policy 0, policy_version 320902 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:16,786][626795] Updated weights for policy 0, policy_version 320912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:18,840][626795] Updated weights for policy 0, policy_version 320922 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:18,975][24592] Fps is (10 sec: 36863.9, 60 sec: 38367.0, 300 sec: 39710.4). Total num frames: 2628993024. Throughput: 0: 9579.7. Samples: 407237424. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:18,976][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:21,122][626795] Updated weights for policy 0, policy_version 320932 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:23,262][626795] Updated weights for policy 0, policy_version 320942 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:23,975][24592] Fps is (10 sec: 36864.0, 60 sec: 38229.6, 300 sec: 39654.9). Total num frames: 2629181440. Throughput: 0: 9459.7. Samples: 407294202. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:23,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:25,152][626795] Updated weights for policy 0, policy_version 320952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:27,101][626795] Updated weights for policy 0, policy_version 320962 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:28,975][24592] Fps is (10 sec: 40140.6, 60 sec: 38775.4, 300 sec: 39654.8). Total num frames: 2629394432. Throughput: 0: 9472.7. Samples: 407325378. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:28,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:29,266][626795] Updated weights for policy 0, policy_version 320972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:31,205][626795] Updated weights for policy 0, policy_version 320982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:33,251][626795] Updated weights for policy 0, policy_version 320992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:34,405][24592] Fps is (10 sec: 38486.5, 60 sec: 38228.6, 300 sec: 39652.6). Total num frames: 2629582848. Throughput: 0: 9386.5. Samples: 407386740. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:34,406][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:35,773][626795] Updated weights for policy 0, policy_version 321002 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:37,931][626795] Updated weights for policy 0, policy_version 321012 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:38,975][24592] Fps is (10 sec: 37683.4, 60 sec: 38092.8, 300 sec: 39627.1). Total num frames: 2629771264. Throughput: 0: 9396.8. Samples: 407441724. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:38,976][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:39,869][626795] Updated weights for policy 0, policy_version 321022 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:41,797][626795] Updated weights for policy 0, policy_version 321032 (0.0031)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:43,976][24592] Fps is (10 sec: 40231.4, 60 sec: 37956.5, 300 sec: 39599.3). Total num frames: 2629967872. Throughput: 0: 9398.5. Samples: 407472528. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:43,978][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:44,009][626795] Updated weights for policy 0, policy_version 321042 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:46,200][626795] Updated weights for policy 0, policy_version 321052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:48,192][626795] Updated weights for policy 0, policy_version 321062 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:48,975][24592] Fps is (10 sec: 40140.9, 60 sec: 38093.3, 300 sec: 39599.3). Total num frames: 2630172672. Throughput: 0: 9564.4. Samples: 407530188. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:48,976][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:50,245][626795] Updated weights for policy 0, policy_version 321072 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:52,164][626795] Updated weights for policy 0, policy_version 321082 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:53,975][24592] Fps is (10 sec: 40141.1, 60 sec: 38092.8, 300 sec: 39571.5). Total num frames: 2630369280. Throughput: 0: 9717.7. Samples: 407591928. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:53,977][24592] Avg episode reward: [(0, '4.351')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:54,244][626795] Updated weights for policy 0, policy_version 321092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:56,226][626795] Updated weights for policy 0, policy_version 321102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:06:58,302][626795] Updated weights for policy 0, policy_version 321112 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:58,976][24592] Fps is (10 sec: 40140.0, 60 sec: 38092.7, 300 sec: 39571.5). Total num frames: 2630574080. Throughput: 0: 9775.4. Samples: 407621808. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:06:58,979][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:00,398][626795] Updated weights for policy 0, policy_version 321122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:02,372][626795] Updated weights for policy 0, policy_version 321132 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:03,976][24592] Fps is (10 sec: 40140.3, 60 sec: 38775.9, 300 sec: 39516.0). Total num frames: 2630770688. Throughput: 0: 9887.7. Samples: 407682372. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:03,978][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:03,988][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000321140_2630778880.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:04,091][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000319986_2621325312.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:04,459][626795] Updated weights for policy 0, policy_version 321142 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:06,420][626795] Updated weights for policy 0, policy_version 321152 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:08,976][24592] Fps is (10 sec: 37680.3, 60 sec: 38774.8, 300 sec: 39515.9). Total num frames: 2630950912. Throughput: 0: 9812.9. Samples: 407735790. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:08,978][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:09,071][626795] Updated weights for policy 0, policy_version 321162 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:11,239][626795] Updated weights for policy 0, policy_version 321172 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:13,319][626795] Updated weights for policy 0, policy_version 321182 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:13,976][24592] Fps is (10 sec: 36863.5, 60 sec: 38775.3, 300 sec: 39460.4). Total num frames: 2631139328. Throughput: 0: 9769.3. Samples: 407764998. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:13,977][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:15,480][626795] Updated weights for policy 0, policy_version 321192 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:17,693][626795] Updated weights for policy 0, policy_version 321202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:18,975][24592] Fps is (10 sec: 38506.2, 60 sec: 39048.6, 300 sec: 39432.7). Total num frames: 2631335936. Throughput: 0: 9759.1. Samples: 407821704. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:18,976][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:19,835][626795] Updated weights for policy 0, policy_version 321212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:21,654][626795] Updated weights for policy 0, policy_version 321222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:23,932][626795] Updated weights for policy 0, policy_version 321232 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:23,975][24592] Fps is (10 sec: 39322.7, 60 sec: 39185.1, 300 sec: 39404.9). Total num frames: 2631532544. Throughput: 0: 9752.7. Samples: 407880594. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:23,977][24592] Avg episode reward: [(0, '4.933')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:26,323][626795] Updated weights for policy 0, policy_version 321242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:28,531][626795] Updated weights for policy 0, policy_version 321252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:28,975][24592] Fps is (10 sec: 36864.2, 60 sec: 38502.5, 300 sec: 39293.8). Total num frames: 2631704576. Throughput: 0: 9652.3. Samples: 407906880. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:28,976][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:30,704][626795] Updated weights for policy 0, policy_version 321262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:32,672][626795] Updated weights for policy 0, policy_version 321272 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:33,975][24592] Fps is (10 sec: 37683.1, 60 sec: 39055.3, 300 sec: 39293.8). Total num frames: 2631909376. Throughput: 0: 9670.7. Samples: 407965368. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:33,977][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:34,868][626795] Updated weights for policy 0, policy_version 321282 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:37,089][626795] Updated weights for policy 0, policy_version 321292 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:38,975][24592] Fps is (10 sec: 38502.2, 60 sec: 38639.0, 300 sec: 39269.8). Total num frames: 2632089600. Throughput: 0: 9527.6. Samples: 408020670. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:38,976][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:39,556][626795] Updated weights for policy 0, policy_version 321302 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:42,337][626795] Updated weights for policy 0, policy_version 321312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:43,975][24592] Fps is (10 sec: 34406.1, 60 sec: 38092.8, 300 sec: 39127.2). Total num frames: 2632253440. Throughput: 0: 9317.5. Samples: 408041094. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:43,976][24592] Avg episode reward: [(0, '4.250')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:44,358][626795] Updated weights for policy 0, policy_version 321322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:46,308][626795] Updated weights for policy 0, policy_version 321332 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:48,313][626795] Updated weights for policy 0, policy_version 321342 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:48,975][24592] Fps is (10 sec: 36044.9, 60 sec: 37956.3, 300 sec: 39127.2). Total num frames: 2632450048. Throughput: 0: 9321.4. Samples: 408101832. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:48,976][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:50,510][626795] Updated weights for policy 0, policy_version 321352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:52,500][626795] Updated weights for policy 0, policy_version 321362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:53,976][24592] Fps is (10 sec: 40140.0, 60 sec: 38092.6, 300 sec: 39099.4). Total num frames: 2632654848. Throughput: 0: 9494.0. Samples: 408163014. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:53,977][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:54,430][626795] Updated weights for policy 0, policy_version 321372 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:56,531][626795] Updated weights for policy 0, policy_version 321382 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:07:58,364][626795] Updated weights for policy 0, policy_version 321392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:58,984][24592] Fps is (10 sec: 40923.9, 60 sec: 38087.4, 300 sec: 39098.3). Total num frames: 2632859648. Throughput: 0: 9516.9. Samples: 408193338. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:07:58,985][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:00,411][626795] Updated weights for policy 0, policy_version 321402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:02,537][626795] Updated weights for policy 0, policy_version 321412 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:03,975][24592] Fps is (10 sec: 40961.3, 60 sec: 38229.4, 300 sec: 39099.5). Total num frames: 2633064448. Throughput: 0: 9624.5. Samples: 408254808. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:03,977][24592] Avg episode reward: [(0, '4.355')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:04,472][626795] Updated weights for policy 0, policy_version 321422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:06,445][626795] Updated weights for policy 0, policy_version 321432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:08,589][626795] Updated weights for policy 0, policy_version 321442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:08,975][24592] Fps is (10 sec: 40995.8, 60 sec: 38639.5, 300 sec: 39099.4). Total num frames: 2633269248. Throughput: 0: 9672.4. Samples: 408315852. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:08,978][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:10,460][626795] Updated weights for policy 0, policy_version 321452 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:12,388][626795] Updated weights for policy 0, policy_version 321462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:13,975][24592] Fps is (10 sec: 38502.4, 60 sec: 38502.6, 300 sec: 39099.4). Total num frames: 2633449472. Throughput: 0: 9761.3. Samples: 408346140. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:13,976][24592] Avg episode reward: [(0, '4.441')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:15,216][626795] Updated weights for policy 0, policy_version 321472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:17,225][626795] Updated weights for policy 0, policy_version 321482 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:18,975][24592] Fps is (10 sec: 37683.6, 60 sec: 38502.4, 300 sec: 39071.7). Total num frames: 2633646080. Throughput: 0: 9652.0. Samples: 408399708. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:18,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:19,374][626795] Updated weights for policy 0, policy_version 321492 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:21,278][626795] Updated weights for policy 0, policy_version 321502 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:23,226][626795] Updated weights for policy 0, policy_version 321512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:23,975][24592] Fps is (10 sec: 40140.5, 60 sec: 38638.9, 300 sec: 39071.7). Total num frames: 2633850880. Throughput: 0: 9779.1. Samples: 408460728. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:23,976][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:25,315][626795] Updated weights for policy 0, policy_version 321522 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:27,418][626795] Updated weights for policy 0, policy_version 321532 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:28,975][24592] Fps is (10 sec: 40959.7, 60 sec: 39185.0, 300 sec: 39071.7). Total num frames: 2634055680. Throughput: 0: 9994.6. Samples: 408490848. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:28,978][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:29,461][626795] Updated weights for policy 0, policy_version 321542 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:29,711][626772] Signal inference workers to stop experience collection... (5300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:29,714][626772] Signal inference workers to resume experience collection... (5300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:29,735][626795] InferenceWorker_p0-w0: stopping experience collection (5300 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:29,740][626795] InferenceWorker_p0-w0: resuming experience collection (5300 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:31,292][626795] Updated weights for policy 0, policy_version 321552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:33,419][626795] Updated weights for policy 0, policy_version 321562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:33,975][24592] Fps is (10 sec: 40140.5, 60 sec: 39048.5, 300 sec: 39043.9). Total num frames: 2634252288. Throughput: 0: 9994.5. Samples: 408551586. Policy #0 lag: (min: 1.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:33,977][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:35,432][626795] Updated weights for policy 0, policy_version 321572 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:37,450][626795] Updated weights for policy 0, policy_version 321582 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:38,975][24592] Fps is (10 sec: 40140.9, 60 sec: 39458.1, 300 sec: 39043.9). Total num frames: 2634457088. Throughput: 0: 9993.7. Samples: 408612726. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:38,976][24592] Avg episode reward: [(0, '5.102')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:39,541][626795] Updated weights for policy 0, policy_version 321592 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:41,609][626795] Updated weights for policy 0, policy_version 321602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:43,468][626795] Updated weights for policy 0, policy_version 321612 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:43,977][24592] Fps is (10 sec: 40953.7, 60 sec: 40139.8, 300 sec: 39127.0). Total num frames: 2634661888. Throughput: 0: 9994.3. Samples: 408643008. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:43,978][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:46,259][626795] Updated weights for policy 0, policy_version 321622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:48,290][626795] Updated weights for policy 0, policy_version 321632 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:48,975][24592] Fps is (10 sec: 37683.2, 60 sec: 39731.2, 300 sec: 39127.2). Total num frames: 2634833920. Throughput: 0: 9821.1. Samples: 408696756. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:48,977][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:50,321][626795] Updated weights for policy 0, policy_version 321642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:52,269][626795] Updated weights for policy 0, policy_version 321652 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:53,975][24592] Fps is (10 sec: 37689.6, 60 sec: 39731.4, 300 sec: 39155.0). Total num frames: 2635038720. Throughput: 0: 9826.8. Samples: 408758058. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:53,977][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:54,264][626795] Updated weights for policy 0, policy_version 321662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:56,429][626795] Updated weights for policy 0, policy_version 321672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:08:58,320][626795] Updated weights for policy 0, policy_version 321682 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:58,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39737.0, 300 sec: 39210.5). Total num frames: 2635243520. Throughput: 0: 9819.1. Samples: 408787998. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:08:58,976][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:00,400][626795] Updated weights for policy 0, policy_version 321692 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:02,390][626795] Updated weights for policy 0, policy_version 321702 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:03,975][24592] Fps is (10 sec: 40959.7, 60 sec: 39731.2, 300 sec: 39238.3). Total num frames: 2635448320. Throughput: 0: 9988.6. Samples: 408849198. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:03,977][24592] Avg episode reward: [(0, '4.439')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000321710_2635448320.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:04,121][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000320567_2626084864.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:04,433][626795] Updated weights for policy 0, policy_version 321712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:06,418][626795] Updated weights for policy 0, policy_version 321722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:08,597][626795] Updated weights for policy 0, policy_version 321732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:08,976][24592] Fps is (10 sec: 40139.8, 60 sec: 39594.5, 300 sec: 39210.5). Total num frames: 2635644928. Throughput: 0: 9969.4. Samples: 408909354. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:08,976][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:10,566][626795] Updated weights for policy 0, policy_version 321742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:12,497][626795] Updated weights for policy 0, policy_version 321752 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:13,975][24592] Fps is (10 sec: 40140.6, 60 sec: 40004.2, 300 sec: 39238.3). Total num frames: 2635849728. Throughput: 0: 9981.9. Samples: 408940032. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:13,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:14,545][626795] Updated weights for policy 0, policy_version 321762 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:16,520][626795] Updated weights for policy 0, policy_version 321772 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:18,976][24592] Fps is (10 sec: 37682.6, 60 sec: 39594.4, 300 sec: 39210.5). Total num frames: 2636021760. Throughput: 0: 10002.2. Samples: 409001688. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:18,977][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:19,168][626795] Updated weights for policy 0, policy_version 321782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:21,286][626795] Updated weights for policy 0, policy_version 321792 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:23,284][626795] Updated weights for policy 0, policy_version 321802 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:23,975][24592] Fps is (10 sec: 37683.5, 60 sec: 39594.7, 300 sec: 39210.5). Total num frames: 2636226560. Throughput: 0: 9830.0. Samples: 409055076. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:23,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:25,175][626795] Updated weights for policy 0, policy_version 321812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:27,303][626795] Updated weights for policy 0, policy_version 321822 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:28,975][24592] Fps is (10 sec: 40961.8, 60 sec: 39594.7, 300 sec: 39182.8). Total num frames: 2636431360. Throughput: 0: 9837.3. Samples: 409085670. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:28,976][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:29,344][626795] Updated weights for policy 0, policy_version 321832 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:31,202][626795] Updated weights for policy 0, policy_version 321842 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:33,127][626795] Updated weights for policy 0, policy_version 321852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:33,976][24592] Fps is (10 sec: 40959.2, 60 sec: 39731.1, 300 sec: 39182.8). Total num frames: 2636636160. Throughput: 0: 10024.4. Samples: 409147854. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:33,976][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:35,264][626795] Updated weights for policy 0, policy_version 321862 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:37,254][626795] Updated weights for policy 0, policy_version 321872 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:38,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39731.2, 300 sec: 39155.1). Total num frames: 2636840960. Throughput: 0: 10026.9. Samples: 409209270. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:38,976][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:39,292][626795] Updated weights for policy 0, policy_version 321882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:41,258][626795] Updated weights for policy 0, policy_version 321892 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:43,305][626795] Updated weights for policy 0, policy_version 321902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:43,975][24592] Fps is (10 sec: 40960.8, 60 sec: 39732.3, 300 sec: 39127.3). Total num frames: 2637045760. Throughput: 0: 10024.0. Samples: 409239078. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:43,977][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:45,272][626795] Updated weights for policy 0, policy_version 321912 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:47,301][626795] Updated weights for policy 0, policy_version 321922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:48,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40277.3, 300 sec: 39099.5). Total num frames: 2637250560. Throughput: 0: 10018.7. Samples: 409300038. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:48,977][24592] Avg episode reward: [(0, '4.959')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:49,378][626795] Updated weights for policy 0, policy_version 321932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:51,953][626795] Updated weights for policy 0, policy_version 321942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:53,977][24592] Fps is (10 sec: 37676.9, 60 sec: 39730.1, 300 sec: 39127.2). Total num frames: 2637422592. Throughput: 0: 9899.4. Samples: 409354842. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:53,978][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:53,998][626795] Updated weights for policy 0, policy_version 321952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:55,988][626795] Updated weights for policy 0, policy_version 321962 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:09:58,086][626795] Updated weights for policy 0, policy_version 321972 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:58,976][24592] Fps is (10 sec: 37681.4, 60 sec: 39730.9, 300 sec: 39155.0). Total num frames: 2637627392. Throughput: 0: 9881.8. Samples: 409384716. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:09:58,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:00,094][626795] Updated weights for policy 0, policy_version 321982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:02,145][626795] Updated weights for policy 0, policy_version 321992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:03,976][24592] Fps is (10 sec: 40965.8, 60 sec: 39731.1, 300 sec: 39155.0). Total num frames: 2637832192. Throughput: 0: 9880.0. Samples: 409446288. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:03,977][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:04,149][626795] Updated weights for policy 0, policy_version 322002 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:06,181][626795] Updated weights for policy 0, policy_version 322012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:08,087][626795] Updated weights for policy 0, policy_version 322022 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:08,975][24592] Fps is (10 sec: 40961.8, 60 sec: 39867.9, 300 sec: 39155.0). Total num frames: 2638036992. Throughput: 0: 10043.6. Samples: 409507038. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:08,977][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:10,131][626795] Updated weights for policy 0, policy_version 322032 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:12,105][626795] Updated weights for policy 0, policy_version 322042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:13,976][24592] Fps is (10 sec: 40959.9, 60 sec: 39867.6, 300 sec: 39155.2). Total num frames: 2638241792. Throughput: 0: 10051.7. Samples: 409537998. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:13,978][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:14,020][626795] Updated weights for policy 0, policy_version 322052 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:16,033][626795] Updated weights for policy 0, policy_version 322062 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:18,022][626795] Updated weights for policy 0, policy_version 322072 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:18,976][24592] Fps is (10 sec: 40957.5, 60 sec: 40413.7, 300 sec: 39182.7). Total num frames: 2638446592. Throughput: 0: 10053.6. Samples: 409600272. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:18,979][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:20,056][626795] Updated weights for policy 0, policy_version 322082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:22,142][626795] Updated weights for policy 0, policy_version 322092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:24,603][24592] Fps is (10 sec: 38543.3, 60 sec: 39995.7, 300 sec: 39182.7). Total num frames: 2638651392. Throughput: 0: 9913.9. Samples: 409661616. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:24,605][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:24,709][626795] Updated weights for policy 0, policy_version 322102 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:27,007][626795] Updated weights for policy 0, policy_version 322112 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:28,807][626795] Updated weights for policy 0, policy_version 322122 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:28,975][24592] Fps is (10 sec: 37685.4, 60 sec: 39867.7, 300 sec: 39155.0). Total num frames: 2638823424. Throughput: 0: 9893.6. Samples: 409684290. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:28,979][24592] Avg episode reward: [(0, '4.890')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:30,859][626795] Updated weights for policy 0, policy_version 322132 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:32,866][626795] Updated weights for policy 0, policy_version 322142 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:33,975][24592] Fps is (10 sec: 40205.3, 60 sec: 39867.9, 300 sec: 39127.2). Total num frames: 2639028224. Throughput: 0: 9890.3. Samples: 409745100. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:33,977][24592] Avg episode reward: [(0, '4.821')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:34,994][626795] Updated weights for policy 0, policy_version 322152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:36,867][626795] Updated weights for policy 0, policy_version 322162 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:38,920][626795] Updated weights for policy 0, policy_version 322172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:38,975][24592] Fps is (10 sec: 40960.3, 60 sec: 39867.7, 300 sec: 39127.3). Total num frames: 2639233024. Throughput: 0: 10035.3. Samples: 409806414. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:38,977][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:40,978][626795] Updated weights for policy 0, policy_version 322182 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:42,814][626795] Updated weights for policy 0, policy_version 322192 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:43,976][24592] Fps is (10 sec: 40957.5, 60 sec: 39867.3, 300 sec: 39155.0). Total num frames: 2639437824. Throughput: 0: 10065.8. Samples: 409837680. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:43,978][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:44,876][626795] Updated weights for policy 0, policy_version 322202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:46,694][626795] Updated weights for policy 0, policy_version 322212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:48,645][626795] Updated weights for policy 0, policy_version 322222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:48,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40004.3, 300 sec: 39210.5). Total num frames: 2639650816. Throughput: 0: 10094.7. Samples: 409900548. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:48,976][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:50,772][626795] Updated weights for policy 0, policy_version 322232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:52,876][626795] Updated weights for policy 0, policy_version 322242 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:53,976][24592] Fps is (10 sec: 41780.5, 60 sec: 40551.3, 300 sec: 39210.5). Total num frames: 2639855616. Throughput: 0: 10084.1. Samples: 409960824. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:53,978][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:54,772][626795] Updated weights for policy 0, policy_version 322252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:56,797][626795] Updated weights for policy 0, policy_version 322262 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:58,975][24592] Fps is (10 sec: 36863.9, 60 sec: 39868.0, 300 sec: 39238.4). Total num frames: 2640019456. Throughput: 0: 10072.6. Samples: 409991262. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:10:58,977][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:10:59,719][626795] Updated weights for policy 0, policy_version 322272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:01,724][626795] Updated weights for policy 0, policy_version 322282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:03,671][626795] Updated weights for policy 0, policy_version 322292 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:03,975][24592] Fps is (10 sec: 36864.9, 60 sec: 39867.9, 300 sec: 39321.6). Total num frames: 2640224256. Throughput: 0: 9861.2. Samples: 410044020. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:03,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000322293_2640224256.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:04,117][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000321140_2630778880.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:05,772][626795] Updated weights for policy 0, policy_version 322302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:07,680][626795] Updated weights for policy 0, policy_version 322312 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:08,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39867.7, 300 sec: 39377.1). Total num frames: 2640429056. Throughput: 0: 9987.3. Samples: 410104782. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:08,978][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:09,809][626795] Updated weights for policy 0, policy_version 322322 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:11,842][626795] Updated weights for policy 0, policy_version 322332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:13,688][626795] Updated weights for policy 0, policy_version 322342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:13,975][24592] Fps is (10 sec: 40960.3, 60 sec: 39868.0, 300 sec: 39460.5). Total num frames: 2640633856. Throughput: 0: 10027.0. Samples: 410135502. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:13,976][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:15,970][626795] Updated weights for policy 0, policy_version 322352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:18,086][626795] Updated weights for policy 0, policy_version 322362 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:18,976][24592] Fps is (10 sec: 39320.6, 60 sec: 39594.9, 300 sec: 39460.4). Total num frames: 2640822272. Throughput: 0: 9975.9. Samples: 410194020. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:18,979][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:20,007][626795] Updated weights for policy 0, policy_version 322372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:21,999][626795] Updated weights for policy 0, policy_version 322382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:23,958][626795] Updated weights for policy 0, policy_version 322392 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:23,975][24592] Fps is (10 sec: 40140.5, 60 sec: 40151.0, 300 sec: 39460.5). Total num frames: 2641035264. Throughput: 0: 9995.9. Samples: 410256228. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:23,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:26,005][626795] Updated weights for policy 0, policy_version 322402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:27,946][626795] Updated weights for policy 0, policy_version 322412 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:28,975][24592] Fps is (10 sec: 40961.2, 60 sec: 40140.8, 300 sec: 39545.8). Total num frames: 2641231872. Throughput: 0: 9971.2. Samples: 410286378. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:28,978][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:30,799][626795] Updated weights for policy 0, policy_version 322422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:32,769][626795] Updated weights for policy 0, policy_version 322432 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:33,975][24592] Fps is (10 sec: 36863.9, 60 sec: 39594.6, 300 sec: 39432.7). Total num frames: 2641403904. Throughput: 0: 9761.1. Samples: 410339796. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:33,976][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:34,938][626795] Updated weights for policy 0, policy_version 322442 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:36,932][626795] Updated weights for policy 0, policy_version 322452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:38,896][626795] Updated weights for policy 0, policy_version 322462 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:38,975][24592] Fps is (10 sec: 37683.2, 60 sec: 39594.7, 300 sec: 39460.5). Total num frames: 2641608704. Throughput: 0: 9770.7. Samples: 410400504. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:38,977][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:41,021][626795] Updated weights for policy 0, policy_version 322472 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:43,226][626795] Updated weights for policy 0, policy_version 322482 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:43,975][24592] Fps is (10 sec: 39321.7, 60 sec: 39322.0, 300 sec: 39404.9). Total num frames: 2641797120. Throughput: 0: 9728.5. Samples: 410429046. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:43,976][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:45,520][626795] Updated weights for policy 0, policy_version 322492 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:47,477][626795] Updated weights for policy 0, policy_version 322502 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:48,975][24592] Fps is (10 sec: 37683.2, 60 sec: 38912.0, 300 sec: 39377.1). Total num frames: 2641985536. Throughput: 0: 9826.9. Samples: 410486232. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:48,976][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:49,611][626795] Updated weights for policy 0, policy_version 322512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:51,622][626795] Updated weights for policy 0, policy_version 322522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:53,468][626795] Updated weights for policy 0, policy_version 322532 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:53,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39048.7, 300 sec: 39404.9). Total num frames: 2642198528. Throughput: 0: 9842.3. Samples: 410547684. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:53,978][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:55,565][626795] Updated weights for policy 0, policy_version 322542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:57,581][626795] Updated weights for policy 0, policy_version 322552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:58,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39594.7, 300 sec: 39404.9). Total num frames: 2642395136. Throughput: 0: 9819.9. Samples: 410577396. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:11:58,977][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:11:59,583][626795] Updated weights for policy 0, policy_version 322562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:01,715][626795] Updated weights for policy 0, policy_version 322572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:03,975][24592] Fps is (10 sec: 36863.9, 60 sec: 39048.5, 300 sec: 39377.3). Total num frames: 2642567168. Throughput: 0: 9863.5. Samples: 410637876. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:03,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:04,479][626795] Updated weights for policy 0, policy_version 322582 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:06,546][626795] Updated weights for policy 0, policy_version 322592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:08,509][626795] Updated weights for policy 0, policy_version 322602 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:08,975][24592] Fps is (10 sec: 37683.3, 60 sec: 39048.6, 300 sec: 39432.7). Total num frames: 2642771968. Throughput: 0: 9662.0. Samples: 410691018. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:08,976][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:10,556][626795] Updated weights for policy 0, policy_version 322612 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:12,761][626795] Updated weights for policy 0, policy_version 322622 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:13,975][24592] Fps is (10 sec: 40140.7, 60 sec: 38911.9, 300 sec: 39432.7). Total num frames: 2642968576. Throughput: 0: 9651.5. Samples: 410720694. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:13,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:14,764][626795] Updated weights for policy 0, policy_version 322632 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:16,799][626795] Updated weights for policy 0, policy_version 322642 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:18,738][626795] Updated weights for policy 0, policy_version 322652 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:18,975][24592] Fps is (10 sec: 40140.7, 60 sec: 39185.3, 300 sec: 39460.4). Total num frames: 2643173376. Throughput: 0: 9799.1. Samples: 410780754. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:18,977][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:20,727][626795] Updated weights for policy 0, policy_version 322662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:22,730][626795] Updated weights for policy 0, policy_version 322672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:23,975][24592] Fps is (10 sec: 40960.2, 60 sec: 39048.5, 300 sec: 39571.5). Total num frames: 2643378176. Throughput: 0: 9846.8. Samples: 410843610. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:23,977][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:24,699][626795] Updated weights for policy 0, policy_version 322682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:26,705][626795] Updated weights for policy 0, policy_version 322692 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:28,605][626795] Updated weights for policy 0, policy_version 322702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:28,975][24592] Fps is (10 sec: 40960.0, 60 sec: 39185.1, 300 sec: 39571.5). Total num frames: 2643582976. Throughput: 0: 9894.0. Samples: 410874276. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:28,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:30,670][626795] Updated weights for policy 0, policy_version 322712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:32,697][626795] Updated weights for policy 0, policy_version 322722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:33,975][24592] Fps is (10 sec: 40959.8, 60 sec: 39731.2, 300 sec: 39654.8). Total num frames: 2643787776. Throughput: 0: 9970.5. Samples: 410934906. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:33,977][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:34,857][626795] Updated weights for policy 0, policy_version 322732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:37,460][626795] Updated weights for policy 0, policy_version 322742 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:38,975][24592] Fps is (10 sec: 37683.3, 60 sec: 39185.1, 300 sec: 39682.6). Total num frames: 2643959808. Throughput: 0: 9790.4. Samples: 410988252. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:38,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:39,583][626795] Updated weights for policy 0, policy_version 322752 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:41,637][626795] Updated weights for policy 0, policy_version 322762 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:43,690][626795] Updated weights for policy 0, policy_version 322772 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:43,976][24592] Fps is (10 sec: 36863.4, 60 sec: 39321.5, 300 sec: 39682.6). Total num frames: 2644156416. Throughput: 0: 9785.2. Samples: 411017730. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:43,978][24592] Avg episode reward: [(0, '4.611')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:45,682][626795] Updated weights for policy 0, policy_version 322782 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:47,666][626795] Updated weights for policy 0, policy_version 322792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:48,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39594.7, 300 sec: 39682.6). Total num frames: 2644361216. Throughput: 0: 9803.6. Samples: 411079038. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:48,976][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:49,628][626795] Updated weights for policy 0, policy_version 322802 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:51,657][626795] Updated weights for policy 0, policy_version 322812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:53,536][626795] Updated weights for policy 0, policy_version 322822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:53,975][24592] Fps is (10 sec: 40961.2, 60 sec: 39458.2, 300 sec: 39683.8). Total num frames: 2644566016. Throughput: 0: 9998.5. Samples: 411140952. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:53,976][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:55,668][626795] Updated weights for policy 0, policy_version 322832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:57,637][626795] Updated weights for policy 0, policy_version 322842 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:58,976][24592] Fps is (10 sec: 41778.4, 60 sec: 39731.1, 300 sec: 39710.3). Total num frames: 2644779008. Throughput: 0: 10004.5. Samples: 411170898. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:12:58,978][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:12:59,631][626795] Updated weights for policy 0, policy_version 322852 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:01,646][626795] Updated weights for policy 0, policy_version 322862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:03,600][626795] Updated weights for policy 0, policy_version 322872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:03,975][24592] Fps is (10 sec: 40959.3, 60 sec: 40140.8, 300 sec: 39682.6). Total num frames: 2644975616. Throughput: 0: 10036.5. Samples: 411232398. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:03,977][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000322873_2644975616.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:04,129][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000321710_2635448320.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:05,639][626795] Updated weights for policy 0, policy_version 322882 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:07,649][626795] Updated weights for policy 0, policy_version 322892 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:09,367][24592] Fps is (10 sec: 37053.1, 60 sec: 39609.4, 300 sec: 39657.8). Total num frames: 2645164032. Throughput: 0: 9891.5. Samples: 411292596. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:09,369][24592] Avg episode reward: [(0, '4.975')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:10,450][626795] Updated weights for policy 0, policy_version 322902 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:12,620][626795] Updated weights for policy 0, policy_version 322912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:13,976][24592] Fps is (10 sec: 36863.3, 60 sec: 39594.5, 300 sec: 39654.8). Total num frames: 2645344256. Throughput: 0: 9785.8. Samples: 411314640. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:13,979][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:14,708][626795] Updated weights for policy 0, policy_version 322922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:16,624][626795] Updated weights for policy 0, policy_version 322932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:18,618][626795] Updated weights for policy 0, policy_version 322942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:18,976][24592] Fps is (10 sec: 40922.0, 60 sec: 39731.0, 300 sec: 39682.6). Total num frames: 2645557248. Throughput: 0: 9789.8. Samples: 411375450. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:18,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:20,582][626795] Updated weights for policy 0, policy_version 322952 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:22,567][626795] Updated weights for policy 0, policy_version 322962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:23,975][24592] Fps is (10 sec: 41780.4, 60 sec: 39731.2, 300 sec: 39682.6). Total num frames: 2645762048. Throughput: 0: 10002.1. Samples: 411438348. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:23,978][24592] Avg episode reward: [(0, '4.417')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:24,658][626795] Updated weights for policy 0, policy_version 322972 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:26,436][626795] Updated weights for policy 0, policy_version 322982 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:28,454][626795] Updated weights for policy 0, policy_version 322992 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:28,975][24592] Fps is (10 sec: 40960.5, 60 sec: 39731.1, 300 sec: 39710.4). Total num frames: 2645966848. Throughput: 0: 10042.6. Samples: 411469644. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:28,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:30,559][626795] Updated weights for policy 0, policy_version 323002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:32,392][626795] Updated weights for policy 0, policy_version 323012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:33,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39731.2, 300 sec: 39710.4). Total num frames: 2646171648. Throughput: 0: 10050.3. Samples: 411531300. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:33,977][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:34,500][626795] Updated weights for policy 0, policy_version 323022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:36,545][626795] Updated weights for policy 0, policy_version 323032 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:38,482][626795] Updated weights for policy 0, policy_version 323042 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:38,975][24592] Fps is (10 sec: 40141.5, 60 sec: 40140.8, 300 sec: 39682.8). Total num frames: 2646368256. Throughput: 0: 10029.2. Samples: 411592266. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:38,976][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:40,571][626795] Updated weights for policy 0, policy_version 323052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:43,270][626795] Updated weights for policy 0, policy_version 323062 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:43,975][24592] Fps is (10 sec: 37683.1, 60 sec: 39867.8, 300 sec: 39710.4). Total num frames: 2646548480. Throughput: 0: 10025.6. Samples: 411622050. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:43,976][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:45,322][626795] Updated weights for policy 0, policy_version 323072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:47,411][626795] Updated weights for policy 0, policy_version 323082 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:48,976][24592] Fps is (10 sec: 38501.5, 60 sec: 39867.6, 300 sec: 39710.3). Total num frames: 2646753280. Throughput: 0: 9856.3. Samples: 411675930. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:48,977][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:49,382][626795] Updated weights for policy 0, policy_version 323092 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:51,339][626795] Updated weights for policy 0, policy_version 323102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:53,172][626795] Updated weights for policy 0, policy_version 323112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:53,975][24592] Fps is (10 sec: 40960.3, 60 sec: 39867.7, 300 sec: 39710.4). Total num frames: 2646958080. Throughput: 0: 9976.0. Samples: 411737610. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:53,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:55,244][626795] Updated weights for policy 0, policy_version 323122 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:57,373][626795] Updated weights for policy 0, policy_version 323132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:58,975][24592] Fps is (10 sec: 40960.9, 60 sec: 39731.4, 300 sec: 39710.4). Total num frames: 2647162880. Throughput: 0: 10079.8. Samples: 411768228. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:13:58,976][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:13:59,455][626795] Updated weights for policy 0, policy_version 323142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:01,446][626795] Updated weights for policy 0, policy_version 323152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:03,392][626795] Updated weights for policy 0, policy_version 323162 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:03,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39731.3, 300 sec: 39710.4). Total num frames: 2647359488. Throughput: 0: 10071.7. Samples: 411828672. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:03,978][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:05,447][626795] Updated weights for policy 0, policy_version 323172 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:07,450][626795] Updated weights for policy 0, policy_version 323182 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:08,976][24592] Fps is (10 sec: 40138.3, 60 sec: 40266.5, 300 sec: 39710.3). Total num frames: 2647564288. Throughput: 0: 10041.2. Samples: 411890208. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:08,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:09,369][626795] Updated weights for policy 0, policy_version 323192 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:11,367][626795] Updated weights for policy 0, policy_version 323202 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:13,405][626795] Updated weights for policy 0, policy_version 323212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:13,976][24592] Fps is (10 sec: 40959.2, 60 sec: 40413.9, 300 sec: 39821.5). Total num frames: 2647769088. Throughput: 0: 10027.5. Samples: 411920880. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:13,976][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:16,158][626795] Updated weights for policy 0, policy_version 323222 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:18,143][626795] Updated weights for policy 0, policy_version 323232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:18,975][24592] Fps is (10 sec: 38504.1, 60 sec: 39867.8, 300 sec: 39738.1). Total num frames: 2647949312. Throughput: 0: 9858.5. Samples: 411974934. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:18,978][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:20,151][626795] Updated weights for policy 0, policy_version 323242 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:22,067][626795] Updated weights for policy 0, policy_version 323252 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:23,978][24592] Fps is (10 sec: 38491.2, 60 sec: 39865.7, 300 sec: 39737.7). Total num frames: 2648154112. Throughput: 0: 9870.8. Samples: 412036482. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:23,979][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:24,204][626795] Updated weights for policy 0, policy_version 323262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:26,168][626795] Updated weights for policy 0, policy_version 323272 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:28,198][626795] Updated weights for policy 0, policy_version 323282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:28,975][24592] Fps is (10 sec: 40960.5, 60 sec: 39867.8, 300 sec: 39738.2). Total num frames: 2648358912. Throughput: 0: 9882.9. Samples: 412066782. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:28,977][24592] Avg episode reward: [(0, '4.932')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:30,153][626795] Updated weights for policy 0, policy_version 323292 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:32,089][626795] Updated weights for policy 0, policy_version 323302 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:33,975][24592] Fps is (10 sec: 40972.7, 60 sec: 39867.8, 300 sec: 39738.1). Total num frames: 2648563712. Throughput: 0: 10054.0. Samples: 412128360. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:33,977][24592] Avg episode reward: [(0, '4.782')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:34,267][626795] Updated weights for policy 0, policy_version 323312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:36,110][626795] Updated weights for policy 0, policy_version 323322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:38,139][626795] Updated weights for policy 0, policy_version 323332 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:38,975][24592] Fps is (10 sec: 40141.0, 60 sec: 39867.7, 300 sec: 39710.4). Total num frames: 2648760320. Throughput: 0: 10035.6. Samples: 412189212. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:38,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:40,216][626795] Updated weights for policy 0, policy_version 323342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:42,198][626795] Updated weights for policy 0, policy_version 323352 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:43,975][24592] Fps is (10 sec: 40959.6, 60 sec: 40413.8, 300 sec: 39738.1). Total num frames: 2648973312. Throughput: 0: 10038.5. Samples: 412219962. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:43,979][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:44,171][626795] Updated weights for policy 0, policy_version 323362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:46,208][626795] Updated weights for policy 0, policy_version 323372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:48,584][626772] Signal inference workers to stop experience collection... (5350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:48,585][626772] Signal inference workers to resume experience collection... (5350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:48,597][626795] InferenceWorker_p0-w0: stopping experience collection (5350 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:48,598][626795] InferenceWorker_p0-w0: resuming experience collection (5350 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:48,782][626795] Updated weights for policy 0, policy_version 323382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:48,975][24592] Fps is (10 sec: 38502.2, 60 sec: 39867.8, 300 sec: 39738.4). Total num frames: 2649145344. Throughput: 0: 10039.2. Samples: 412280436. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:48,976][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:50,887][626795] Updated weights for policy 0, policy_version 323392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:52,877][626795] Updated weights for policy 0, policy_version 323402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:53,975][24592] Fps is (10 sec: 37683.5, 60 sec: 39867.7, 300 sec: 39738.2). Total num frames: 2649350144. Throughput: 0: 9890.3. Samples: 412335264. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:53,978][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:55,035][626795] Updated weights for policy 0, policy_version 323412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:56,922][626795] Updated weights for policy 0, policy_version 323422 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:14:58,891][626795] Updated weights for policy 0, policy_version 323432 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:58,975][24592] Fps is (10 sec: 40959.7, 60 sec: 39867.6, 300 sec: 39738.2). Total num frames: 2649554944. Throughput: 0: 9898.7. Samples: 412366320. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:14:58,976][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:00,876][626795] Updated weights for policy 0, policy_version 323442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:02,931][626795] Updated weights for policy 0, policy_version 323452 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:03,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40004.2, 300 sec: 39738.1). Total num frames: 2649759744. Throughput: 0: 10079.1. Samples: 412428492. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:03,977][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000323457_2649759744.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:04,134][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000322293_2640224256.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:05,005][626795] Updated weights for policy 0, policy_version 323462 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:06,845][626795] Updated weights for policy 0, policy_version 323472 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:08,765][626795] Updated weights for policy 0, policy_version 323482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:08,976][24592] Fps is (10 sec: 41776.0, 60 sec: 40140.6, 300 sec: 39765.8). Total num frames: 2649972736. Throughput: 0: 10080.2. Samples: 412490070. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:08,978][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:10,882][626795] Updated weights for policy 0, policy_version 323492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:12,888][626795] Updated weights for policy 0, policy_version 323502 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:13,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40004.4, 300 sec: 39738.2). Total num frames: 2650169344. Throughput: 0: 10077.3. Samples: 412520262. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:13,976][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:14,919][626795] Updated weights for policy 0, policy_version 323512 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:16,854][626795] Updated weights for policy 0, policy_version 323522 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:18,838][626795] Updated weights for policy 0, policy_version 323532 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:18,975][24592] Fps is (10 sec: 40144.3, 60 sec: 40413.9, 300 sec: 39822.8). Total num frames: 2650374144. Throughput: 0: 10077.3. Samples: 412581840. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:18,977][24592] Avg episode reward: [(0, '4.806')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:21,581][626795] Updated weights for policy 0, policy_version 323542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:23,553][626795] Updated weights for policy 0, policy_version 323552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:23,975][24592] Fps is (10 sec: 37683.4, 60 sec: 39869.8, 300 sec: 39738.2). Total num frames: 2650546176. Throughput: 0: 9926.9. Samples: 412635924. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:23,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:25,592][626795] Updated weights for policy 0, policy_version 323562 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:27,626][626795] Updated weights for policy 0, policy_version 323572 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:28,975][24592] Fps is (10 sec: 38502.2, 60 sec: 40004.2, 300 sec: 39765.9). Total num frames: 2650759168. Throughput: 0: 9919.5. Samples: 412666338. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:28,977][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:29,602][626795] Updated weights for policy 0, policy_version 323582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:31,595][626795] Updated weights for policy 0, policy_version 323592 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:33,448][626795] Updated weights for policy 0, policy_version 323602 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:33,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40004.3, 300 sec: 39765.9). Total num frames: 2650963968. Throughput: 0: 9970.1. Samples: 412729092. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:33,977][24592] Avg episode reward: [(0, '4.853')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:35,438][626795] Updated weights for policy 0, policy_version 323612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:37,476][626795] Updated weights for policy 0, policy_version 323622 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:38,975][24592] Fps is (10 sec: 40960.4, 60 sec: 40140.8, 300 sec: 39766.0). Total num frames: 2651168768. Throughput: 0: 10128.8. Samples: 412791060. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:38,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:39,492][626795] Updated weights for policy 0, policy_version 323632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:41,425][626795] Updated weights for policy 0, policy_version 323642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:43,382][626795] Updated weights for policy 0, policy_version 323652 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:43,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40004.4, 300 sec: 39738.1). Total num frames: 2651373568. Throughput: 0: 10109.8. Samples: 412821258. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:43,976][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:45,475][626795] Updated weights for policy 0, policy_version 323662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:47,454][626795] Updated weights for policy 0, policy_version 323672 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:48,975][24592] Fps is (10 sec: 41778.7, 60 sec: 40686.9, 300 sec: 39765.9). Total num frames: 2651586560. Throughput: 0: 10102.4. Samples: 412883100. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:48,977][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:49,554][626795] Updated weights for policy 0, policy_version 323682 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:51,517][626795] Updated weights for policy 0, policy_version 323692 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:54,117][24592] Fps is (10 sec: 37964.6, 60 sec: 40046.3, 300 sec: 39774.6). Total num frames: 2651758592. Throughput: 0: 9348.2. Samples: 412912056. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:54,118][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:54,308][626795] Updated weights for policy 0, policy_version 323702 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:56,371][626795] Updated weights for policy 0, policy_version 323712 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:15:58,405][626795] Updated weights for policy 0, policy_version 323722 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:58,975][24592] Fps is (10 sec: 36864.4, 60 sec: 40004.4, 300 sec: 39765.9). Total num frames: 2651955200. Throughput: 0: 9908.9. Samples: 412966164. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:15:58,977][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:00,487][626795] Updated weights for policy 0, policy_version 323732 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:02,548][626795] Updated weights for policy 0, policy_version 323742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:03,975][24592] Fps is (10 sec: 39886.2, 60 sec: 39867.7, 300 sec: 39738.1). Total num frames: 2652151808. Throughput: 0: 9873.9. Samples: 413026164. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:03,976][24592] Avg episode reward: [(0, '4.863')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:04,570][626795] Updated weights for policy 0, policy_version 323752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:06,490][626795] Updated weights for policy 0, policy_version 323762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:08,469][626795] Updated weights for policy 0, policy_version 323772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:08,975][24592] Fps is (10 sec: 40140.7, 60 sec: 39731.8, 300 sec: 39738.1). Total num frames: 2652356608. Throughput: 0: 10056.0. Samples: 413088444. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:08,977][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:10,333][626795] Updated weights for policy 0, policy_version 323782 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:12,300][626795] Updated weights for policy 0, policy_version 323792 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:13,975][24592] Fps is (10 sec: 41779.6, 60 sec: 40004.3, 300 sec: 39821.5). Total num frames: 2652569600. Throughput: 0: 10068.2. Samples: 413119404. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:13,977][24592] Avg episode reward: [(0, '4.462')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:14,297][626795] Updated weights for policy 0, policy_version 323802 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:16,197][626795] Updated weights for policy 0, policy_version 323812 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:18,190][626795] Updated weights for policy 0, policy_version 323822 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:18,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40004.3, 300 sec: 39793.7). Total num frames: 2652774400. Throughput: 0: 10074.8. Samples: 413182458. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:18,977][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:20,173][626795] Updated weights for policy 0, policy_version 323832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:22,061][626795] Updated weights for policy 0, policy_version 323842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:23,975][24592] Fps is (10 sec: 41778.9, 60 sec: 40686.9, 300 sec: 39849.2). Total num frames: 2652987392. Throughput: 0: 10089.3. Samples: 413245080. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:23,979][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:24,093][626795] Updated weights for policy 0, policy_version 323852 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:26,065][626795] Updated weights for policy 0, policy_version 323862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:28,601][626795] Updated weights for policy 0, policy_version 323872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:28,976][24592] Fps is (10 sec: 39319.6, 60 sec: 40140.5, 300 sec: 39876.9). Total num frames: 2653167616. Throughput: 0: 10051.5. Samples: 413273580. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:28,982][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:30,610][626795] Updated weights for policy 0, policy_version 323882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:32,531][626795] Updated weights for policy 0, policy_version 323892 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:33,975][24592] Fps is (10 sec: 39321.8, 60 sec: 40277.4, 300 sec: 39904.8). Total num frames: 2653380608. Throughput: 0: 9993.0. Samples: 413332782. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:33,977][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:34,619][626795] Updated weights for policy 0, policy_version 323902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:36,460][626795] Updated weights for policy 0, policy_version 323912 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:38,425][626795] Updated weights for policy 0, policy_version 323922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:38,975][24592] Fps is (10 sec: 41781.3, 60 sec: 40277.3, 300 sec: 39960.3). Total num frames: 2653585408. Throughput: 0: 10774.0. Samples: 413395362. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:38,977][24592] Avg episode reward: [(0, '4.870')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:40,491][626795] Updated weights for policy 0, policy_version 323932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:42,308][626795] Updated weights for policy 0, policy_version 323942 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:43,975][24592] Fps is (10 sec: 40960.1, 60 sec: 40277.3, 300 sec: 40015.8). Total num frames: 2653790208. Throughput: 0: 10207.1. Samples: 413425482. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:43,976][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:44,364][626795] Updated weights for policy 0, policy_version 323952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:46,468][626795] Updated weights for policy 0, policy_version 323962 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:48,205][626795] Updated weights for policy 0, policy_version 323972 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:48,976][24592] Fps is (10 sec: 41777.0, 60 sec: 40277.0, 300 sec: 40015.8). Total num frames: 2654003200. Throughput: 0: 10271.9. Samples: 413488404. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:48,978][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:50,302][626795] Updated weights for policy 0, policy_version 323982 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:52,189][626795] Updated weights for policy 0, policy_version 323992 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:53,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41056.9, 300 sec: 40071.4). Total num frames: 2654216192. Throughput: 0: 10298.3. Samples: 413551866. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:53,976][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:54,196][626795] Updated weights for policy 0, policy_version 324002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:56,159][626795] Updated weights for policy 0, policy_version 324012 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:16:58,132][626795] Updated weights for policy 0, policy_version 324022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:58,976][24592] Fps is (10 sec: 41779.5, 60 sec: 41096.2, 300 sec: 40182.4). Total num frames: 2654420992. Throughput: 0: 10282.3. Samples: 413582112. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:16:58,977][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:00,841][626795] Updated weights for policy 0, policy_version 324032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:02,733][626795] Updated weights for policy 0, policy_version 324042 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:03,975][24592] Fps is (10 sec: 38502.5, 60 sec: 40823.5, 300 sec: 40099.1). Total num frames: 2654601216. Throughput: 0: 10116.9. Samples: 413637720. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:03,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000324048_2654601216.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:04,130][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000322873_2644975616.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:04,851][626795] Updated weights for policy 0, policy_version 324052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:06,810][626795] Updated weights for policy 0, policy_version 324062 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:08,650][626795] Updated weights for policy 0, policy_version 324072 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:08,977][24592] Fps is (10 sec: 38499.0, 60 sec: 40822.6, 300 sec: 40126.7). Total num frames: 2654806016. Throughput: 0: 10104.9. Samples: 413699814. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:08,978][24592] Avg episode reward: [(0, '4.856')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:10,704][626795] Updated weights for policy 0, policy_version 324082 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:12,650][626795] Updated weights for policy 0, policy_version 324092 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:13,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40686.9, 300 sec: 40126.9). Total num frames: 2655010816. Throughput: 0: 10129.2. Samples: 413729388. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:13,978][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:14,816][626795] Updated weights for policy 0, policy_version 324102 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:16,891][626795] Updated weights for policy 0, policy_version 324112 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:18,976][24592] Fps is (10 sec: 38506.0, 60 sec: 40277.1, 300 sec: 40043.6). Total num frames: 2655191040. Throughput: 0: 10107.8. Samples: 413787636. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:18,977][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:19,340][626795] Updated weights for policy 0, policy_version 324122 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:21,453][626795] Updated weights for policy 0, policy_version 324132 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:23,499][626795] Updated weights for policy 0, policy_version 324142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:23,976][24592] Fps is (10 sec: 37682.5, 60 sec: 40004.2, 300 sec: 40015.8). Total num frames: 2655387648. Throughput: 0: 10001.8. Samples: 413845446. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:23,976][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:25,441][626795] Updated weights for policy 0, policy_version 324152 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:27,551][626795] Updated weights for policy 0, policy_version 324162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:28,975][24592] Fps is (10 sec: 40142.5, 60 sec: 40414.2, 300 sec: 40015.8). Total num frames: 2655592448. Throughput: 0: 9985.9. Samples: 413874846. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:28,976][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:29,629][626795] Updated weights for policy 0, policy_version 324172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:31,555][626795] Updated weights for policy 0, policy_version 324182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:33,975][24592] Fps is (10 sec: 37683.7, 60 sec: 39731.2, 300 sec: 40015.8). Total num frames: 2655764480. Throughput: 0: 9841.0. Samples: 413931246. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:33,976][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:34,313][626795] Updated weights for policy 0, policy_version 324192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:36,361][626795] Updated weights for policy 0, policy_version 324202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:38,260][626795] Updated weights for policy 0, policy_version 324212 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:38,976][24592] Fps is (10 sec: 37682.4, 60 sec: 39731.1, 300 sec: 40043.6). Total num frames: 2655969280. Throughput: 0: 9744.5. Samples: 413990370. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:38,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:40,478][626795] Updated weights for policy 0, policy_version 324222 (0.0039)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:42,635][626795] Updated weights for policy 0, policy_version 324232 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:43,975][24592] Fps is (10 sec: 39321.7, 60 sec: 39458.1, 300 sec: 39988.1). Total num frames: 2656157696. Throughput: 0: 9692.4. Samples: 414018264. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:43,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:44,678][626795] Updated weights for policy 0, policy_version 324242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:46,762][626795] Updated weights for policy 0, policy_version 324252 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:48,685][626795] Updated weights for policy 0, policy_version 324262 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:48,975][24592] Fps is (10 sec: 39322.3, 60 sec: 39321.9, 300 sec: 39988.1). Total num frames: 2656362496. Throughput: 0: 9801.2. Samples: 414078774. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:48,976][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:50,749][626795] Updated weights for policy 0, policy_version 324272 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:52,678][626795] Updated weights for policy 0, policy_version 324282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:53,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39185.1, 300 sec: 39960.3). Total num frames: 2656567296. Throughput: 0: 9780.8. Samples: 414139938. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:53,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:54,751][626795] Updated weights for policy 0, policy_version 324292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:56,681][626795] Updated weights for policy 0, policy_version 324302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:17:58,788][626795] Updated weights for policy 0, policy_version 324312 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:58,975][24592] Fps is (10 sec: 40960.3, 60 sec: 39185.4, 300 sec: 39988.1). Total num frames: 2656772096. Throughput: 0: 9795.6. Samples: 414170190. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:17:58,977][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:00,717][626795] Updated weights for policy 0, policy_version 324322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:02,762][626795] Updated weights for policy 0, policy_version 324332 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:03,975][24592] Fps is (10 sec: 40140.4, 60 sec: 39458.1, 300 sec: 40069.0). Total num frames: 2656968704. Throughput: 0: 9863.8. Samples: 414231504. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:03,976][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:04,909][626795] Updated weights for policy 0, policy_version 324342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:07,506][626795] Updated weights for policy 0, policy_version 324352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:08,976][24592] Fps is (10 sec: 37681.9, 60 sec: 39049.2, 300 sec: 40015.8). Total num frames: 2657148928. Throughput: 0: 9785.0. Samples: 414285774. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:08,978][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:09,545][626795] Updated weights for policy 0, policy_version 324362 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:11,529][626795] Updated weights for policy 0, policy_version 324372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:13,541][626795] Updated weights for policy 0, policy_version 324382 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:13,975][24592] Fps is (10 sec: 37683.3, 60 sec: 38912.0, 300 sec: 39960.3). Total num frames: 2657345536. Throughput: 0: 9798.5. Samples: 414315780. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:13,976][24592] Avg episode reward: [(0, '4.917')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:15,609][626795] Updated weights for policy 0, policy_version 324392 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:17,680][626795] Updated weights for policy 0, policy_version 324402 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:18,975][24592] Fps is (10 sec: 40141.5, 60 sec: 39321.8, 300 sec: 39960.3). Total num frames: 2657550336. Throughput: 0: 9880.4. Samples: 414375864. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:18,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:19,698][626795] Updated weights for policy 0, policy_version 324412 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:21,702][626795] Updated weights for policy 0, policy_version 324422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:23,550][626795] Updated weights for policy 0, policy_version 324432 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:23,975][24592] Fps is (10 sec: 40960.2, 60 sec: 39458.2, 300 sec: 39960.3). Total num frames: 2657755136. Throughput: 0: 9934.4. Samples: 414437418. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:23,976][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:25,665][626795] Updated weights for policy 0, policy_version 324442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:27,640][626795] Updated weights for policy 0, policy_version 324452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:28,976][24592] Fps is (10 sec: 40957.1, 60 sec: 39457.6, 300 sec: 39960.2). Total num frames: 2657959936. Throughput: 0: 9985.3. Samples: 414467610. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:28,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:29,736][626795] Updated weights for policy 0, policy_version 324462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:31,752][626795] Updated weights for policy 0, policy_version 324472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:33,586][626795] Updated weights for policy 0, policy_version 324482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:33,976][24592] Fps is (10 sec: 40958.9, 60 sec: 40004.1, 300 sec: 39988.0). Total num frames: 2658164736. Throughput: 0: 10009.3. Samples: 414529194. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:33,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:35,701][626795] Updated weights for policy 0, policy_version 324492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:37,793][626795] Updated weights for policy 0, policy_version 324502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:38,975][24592] Fps is (10 sec: 37686.1, 60 sec: 39458.2, 300 sec: 39960.3). Total num frames: 2658336768. Throughput: 0: 9865.3. Samples: 414583878. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:38,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:40,466][626795] Updated weights for policy 0, policy_version 324512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:42,441][626795] Updated weights for policy 0, policy_version 324522 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:43,975][24592] Fps is (10 sec: 37684.0, 60 sec: 39731.2, 300 sec: 39960.3). Total num frames: 2658541568. Throughput: 0: 9838.9. Samples: 414612942. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:43,976][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:44,477][626795] Updated weights for policy 0, policy_version 324532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:46,535][626795] Updated weights for policy 0, policy_version 324542 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:48,506][626795] Updated weights for policy 0, policy_version 324552 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:48,976][24592] Fps is (10 sec: 40959.3, 60 sec: 39731.1, 300 sec: 39960.3). Total num frames: 2658746368. Throughput: 0: 9846.6. Samples: 414674604. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:48,977][24592] Avg episode reward: [(0, '4.508')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:50,575][626795] Updated weights for policy 0, policy_version 324562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:52,508][626795] Updated weights for policy 0, policy_version 324572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:53,975][24592] Fps is (10 sec: 40959.8, 60 sec: 39731.1, 300 sec: 39960.3). Total num frames: 2658951168. Throughput: 0: 9999.0. Samples: 414735726. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:53,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:54,565][626795] Updated weights for policy 0, policy_version 324582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:56,584][626795] Updated weights for policy 0, policy_version 324592 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:18:58,515][626795] Updated weights for policy 0, policy_version 324602 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:58,975][24592] Fps is (10 sec: 40960.4, 60 sec: 39731.1, 300 sec: 39988.1). Total num frames: 2659155968. Throughput: 0: 10009.9. Samples: 414766224. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:18:58,977][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:00,468][626795] Updated weights for policy 0, policy_version 324612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:02,499][626795] Updated weights for policy 0, policy_version 324622 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:03,976][24592] Fps is (10 sec: 40959.2, 60 sec: 39867.6, 300 sec: 39988.1). Total num frames: 2659360768. Throughput: 0: 10041.2. Samples: 414827718. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:03,976][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000324629_2659360768.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:04,147][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000323457_2649759744.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:04,675][626795] Updated weights for policy 0, policy_version 324632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:06,673][626795] Updated weights for policy 0, policy_version 324642 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:08,609][626795] Updated weights for policy 0, policy_version 324652 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:08,975][24592] Fps is (10 sec: 40141.4, 60 sec: 40141.0, 300 sec: 39960.3). Total num frames: 2659557376. Throughput: 0: 10015.7. Samples: 414888126. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:08,976][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:10,633][626795] Updated weights for policy 0, policy_version 324662 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:13,399][626795] Updated weights for policy 0, policy_version 324672 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:13,975][24592] Fps is (10 sec: 36865.0, 60 sec: 39731.2, 300 sec: 39932.5). Total num frames: 2659729408. Throughput: 0: 9857.5. Samples: 414911190. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:13,977][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:15,398][626795] Updated weights for policy 0, policy_version 324682 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:17,364][626795] Updated weights for policy 0, policy_version 324692 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:18,975][24592] Fps is (10 sec: 38502.3, 60 sec: 39867.8, 300 sec: 39960.7). Total num frames: 2659942400. Throughput: 0: 9860.1. Samples: 414972894. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:18,977][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:19,420][626795] Updated weights for policy 0, policy_version 324702 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:21,393][626795] Updated weights for policy 0, policy_version 324712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:23,352][626795] Updated weights for policy 0, policy_version 324722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:23,975][24592] Fps is (10 sec: 40960.4, 60 sec: 39731.2, 300 sec: 39932.5). Total num frames: 2660139008. Throughput: 0: 10006.7. Samples: 415034178. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:23,976][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:25,409][626795] Updated weights for policy 0, policy_version 324732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:27,464][626795] Updated weights for policy 0, policy_version 324742 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:28,975][24592] Fps is (10 sec: 39321.4, 60 sec: 39595.2, 300 sec: 39904.8). Total num frames: 2660335616. Throughput: 0: 10025.7. Samples: 415064100. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:28,977][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:29,857][626795] Updated weights for policy 0, policy_version 324752 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:32,229][626795] Updated weights for policy 0, policy_version 324762 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:33,976][24592] Fps is (10 sec: 37682.5, 60 sec: 39185.2, 300 sec: 39849.2). Total num frames: 2660515840. Throughput: 0: 9852.9. Samples: 415117986. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:33,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:34,469][626795] Updated weights for policy 0, policy_version 324772 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:36,705][626795] Updated weights for policy 0, policy_version 324782 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:38,844][626795] Updated weights for policy 0, policy_version 324792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:38,976][24592] Fps is (10 sec: 36041.9, 60 sec: 39321.1, 300 sec: 39738.0). Total num frames: 2660696064. Throughput: 0: 9701.8. Samples: 415172316. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:38,978][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:40,917][626795] Updated weights for policy 0, policy_version 324802 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:42,902][626795] Updated weights for policy 0, policy_version 324812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:44,523][24592] Fps is (10 sec: 35727.3, 60 sec: 38830.7, 300 sec: 39747.7). Total num frames: 2660892672. Throughput: 0: 9579.8. Samples: 415202562. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:44,525][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:45,562][626795] Updated weights for policy 0, policy_version 324822 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:47,568][626795] Updated weights for policy 0, policy_version 324832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:48,976][24592] Fps is (10 sec: 38502.4, 60 sec: 38911.6, 300 sec: 39765.8). Total num frames: 2661081088. Throughput: 0: 9547.4. Samples: 415257354. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:48,979][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:49,669][626795] Updated weights for policy 0, policy_version 324842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:51,557][626795] Updated weights for policy 0, policy_version 324852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:53,552][626795] Updated weights for policy 0, policy_version 324862 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:53,976][24592] Fps is (10 sec: 40730.8, 60 sec: 38775.2, 300 sec: 39738.1). Total num frames: 2661277696. Throughput: 0: 9575.6. Samples: 415319034. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:53,979][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:55,620][626795] Updated weights for policy 0, policy_version 324872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:57,681][626795] Updated weights for policy 0, policy_version 324882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:58,975][24592] Fps is (10 sec: 40144.1, 60 sec: 38775.5, 300 sec: 39738.1). Total num frames: 2661482496. Throughput: 0: 9726.7. Samples: 415348890. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:19:58,976][24592] Avg episode reward: [(0, '4.819')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:19:59,726][626795] Updated weights for policy 0, policy_version 324892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:01,728][626795] Updated weights for policy 0, policy_version 324902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:03,700][626795] Updated weights for policy 0, policy_version 324912 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:03,975][24592] Fps is (10 sec: 40962.0, 60 sec: 38775.7, 300 sec: 39710.5). Total num frames: 2661687296. Throughput: 0: 9707.3. Samples: 415409724. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:03,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:05,614][626795] Updated weights for policy 0, policy_version 324922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:07,644][626795] Updated weights for policy 0, policy_version 324932 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:08,975][24592] Fps is (10 sec: 40960.5, 60 sec: 38912.0, 300 sec: 39738.2). Total num frames: 2661892096. Throughput: 0: 9722.7. Samples: 415471698. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:08,976][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:09,677][626795] Updated weights for policy 0, policy_version 324942 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:11,721][626795] Updated weights for policy 0, policy_version 324952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:13,648][626795] Updated weights for policy 0, policy_version 324962 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:13,976][24592] Fps is (10 sec: 40959.1, 60 sec: 39458.0, 300 sec: 39738.1). Total num frames: 2662096896. Throughput: 0: 9730.2. Samples: 415501962. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:13,976][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:15,730][626795] Updated weights for policy 0, policy_version 324972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:18,435][626795] Updated weights for policy 0, policy_version 324982 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:18,976][24592] Fps is (10 sec: 37681.9, 60 sec: 38775.3, 300 sec: 39738.1). Total num frames: 2662268928. Throughput: 0: 9731.3. Samples: 415555896. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:18,977][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:20,401][626795] Updated weights for policy 0, policy_version 324992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:22,427][626795] Updated weights for policy 0, policy_version 325002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:23,975][24592] Fps is (10 sec: 37683.5, 60 sec: 38911.9, 300 sec: 39710.4). Total num frames: 2662473728. Throughput: 0: 9895.4. Samples: 415617600. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:23,976][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:24,469][626795] Updated weights for policy 0, policy_version 325012 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:26,389][626795] Updated weights for policy 0, policy_version 325022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:28,374][626795] Updated weights for policy 0, policy_version 325032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:28,975][24592] Fps is (10 sec: 41780.1, 60 sec: 39185.1, 300 sec: 39738.1). Total num frames: 2662686720. Throughput: 0: 10030.2. Samples: 415648428. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:28,977][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:30,414][626795] Updated weights for policy 0, policy_version 325042 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:32,390][626795] Updated weights for policy 0, policy_version 325052 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:33,975][24592] Fps is (10 sec: 40960.8, 60 sec: 39458.3, 300 sec: 39710.4). Total num frames: 2662883328. Throughput: 0: 10051.5. Samples: 415709664. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:33,976][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:34,436][626795] Updated weights for policy 0, policy_version 325062 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:36,472][626795] Updated weights for policy 0, policy_version 325072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:38,391][626795] Updated weights for policy 0, policy_version 325082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:38,976][24592] Fps is (10 sec: 40139.4, 60 sec: 39868.0, 300 sec: 39710.3). Total num frames: 2663088128. Throughput: 0: 10033.6. Samples: 415770546. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:38,977][24592] Avg episode reward: [(0, '4.454')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:40,825][626795] Updated weights for policy 0, policy_version 325092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:43,065][626795] Updated weights for policy 0, policy_version 325102 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:43,975][24592] Fps is (10 sec: 38501.8, 60 sec: 39959.3, 300 sec: 39599.3). Total num frames: 2663268352. Throughput: 0: 9950.7. Samples: 415796670. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:43,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:45,138][626795] Updated weights for policy 0, policy_version 325112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:47,103][626795] Updated weights for policy 0, policy_version 325122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:48,976][24592] Fps is (10 sec: 38503.4, 60 sec: 39868.2, 300 sec: 39729.4). Total num frames: 2663473152. Throughput: 0: 9922.1. Samples: 415856220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:48,978][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:49,120][626795] Updated weights for policy 0, policy_version 325132 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:51,871][626795] Updated weights for policy 0, policy_version 325142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:53,868][626795] Updated weights for policy 0, policy_version 325152 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:53,975][24592] Fps is (10 sec: 37683.3, 60 sec: 39458.4, 300 sec: 39627.1). Total num frames: 2663645184. Throughput: 0: 9738.2. Samples: 415909920. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:53,977][24592] Avg episode reward: [(0, '4.363')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:55,966][626795] Updated weights for policy 0, policy_version 325162 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:20:57,953][626795] Updated weights for policy 0, policy_version 325172 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:58,976][24592] Fps is (10 sec: 37682.3, 60 sec: 39457.9, 300 sec: 39654.8). Total num frames: 2663849984. Throughput: 0: 9745.2. Samples: 415940496. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:20:58,977][24592] Avg episode reward: [(0, '4.451')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:00,050][626795] Updated weights for policy 0, policy_version 325182 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:01,959][626795] Updated weights for policy 0, policy_version 325192 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:03,935][626795] Updated weights for policy 0, policy_version 325202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:03,976][24592] Fps is (10 sec: 40958.8, 60 sec: 39457.9, 300 sec: 39654.8). Total num frames: 2664054784. Throughput: 0: 9891.3. Samples: 416001006. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:03,977][24592] Avg episode reward: [(0, '4.494')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000325202_2664054784.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:04,110][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000324048_2654601216.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:06,089][626795] Updated weights for policy 0, policy_version 325212 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:07,947][626795] Updated weights for policy 0, policy_version 325222 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:08,975][24592] Fps is (10 sec: 40142.5, 60 sec: 39321.6, 300 sec: 39599.3). Total num frames: 2664251392. Throughput: 0: 9885.0. Samples: 416062422. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:08,977][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:10,024][626795] Updated weights for policy 0, policy_version 325232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:12,030][626795] Updated weights for policy 0, policy_version 325242 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:13,976][24592] Fps is (10 sec: 40960.0, 60 sec: 39458.0, 300 sec: 39627.0). Total num frames: 2664464384. Throughput: 0: 9864.7. Samples: 416092344. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:13,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:13,987][626795] Updated weights for policy 0, policy_version 325252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:16,048][626795] Updated weights for policy 0, policy_version 325262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:18,139][626795] Updated weights for policy 0, policy_version 325272 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:18,975][24592] Fps is (10 sec: 40960.0, 60 sec: 39867.9, 300 sec: 39571.5). Total num frames: 2664660992. Throughput: 0: 9866.5. Samples: 416153658. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:18,977][24592] Avg episode reward: [(0, '4.368')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:19,939][626795] Updated weights for policy 0, policy_version 325282 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:21,964][626795] Updated weights for policy 0, policy_version 325292 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:23,975][24592] Fps is (10 sec: 37684.6, 60 sec: 39458.2, 300 sec: 39571.6). Total num frames: 2664841216. Throughput: 0: 9730.5. Samples: 416208414. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:23,976][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:24,759][626795] Updated weights for policy 0, policy_version 325302 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:26,695][626795] Updated weights for policy 0, policy_version 325312 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:28,662][626795] Updated weights for policy 0, policy_version 325322 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:28,979][24592] Fps is (10 sec: 38489.4, 60 sec: 39319.4, 300 sec: 39543.3). Total num frames: 2665046016. Throughput: 0: 9828.2. Samples: 416238972. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:28,979][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:30,682][626795] Updated weights for policy 0, policy_version 325332 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:32,769][626795] Updated weights for policy 0, policy_version 325342 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:33,975][24592] Fps is (10 sec: 40960.2, 60 sec: 39458.1, 300 sec: 39543.8). Total num frames: 2665250816. Throughput: 0: 9882.6. Samples: 416300934. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:33,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:34,735][626795] Updated weights for policy 0, policy_version 325352 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:36,684][626795] Updated weights for policy 0, policy_version 325362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:38,612][626795] Updated weights for policy 0, policy_version 325372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:38,975][24592] Fps is (10 sec: 40973.7, 60 sec: 39458.4, 300 sec: 39543.7). Total num frames: 2665455616. Throughput: 0: 10059.6. Samples: 416362602. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:38,976][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:40,717][626795] Updated weights for policy 0, policy_version 325382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:42,755][626795] Updated weights for policy 0, policy_version 325392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:43,975][24592] Fps is (10 sec: 40959.8, 60 sec: 39867.8, 300 sec: 39516.1). Total num frames: 2665660416. Throughput: 0: 10047.4. Samples: 416392626. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:43,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:44,732][626795] Updated weights for policy 0, policy_version 325402 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:46,767][626795] Updated weights for policy 0, policy_version 325412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:48,744][626795] Updated weights for policy 0, policy_version 325422 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:48,979][24592] Fps is (10 sec: 41763.2, 60 sec: 40001.8, 300 sec: 39515.5). Total num frames: 2665873408. Throughput: 0: 10064.2. Samples: 416453928. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:48,986][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:50,701][626795] Updated weights for policy 0, policy_version 325432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:52,656][626795] Updated weights for policy 0, policy_version 325442 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:53,976][24592] Fps is (10 sec: 40959.0, 60 sec: 40413.7, 300 sec: 39488.2). Total num frames: 2666070016. Throughput: 0: 10082.6. Samples: 416516142. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:53,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:54,785][626795] Updated weights for policy 0, policy_version 325452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:57,489][626795] Updated weights for policy 0, policy_version 325462 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:58,975][24592] Fps is (10 sec: 36878.0, 60 sec: 39868.0, 300 sec: 39460.4). Total num frames: 2666242048. Throughput: 0: 9931.7. Samples: 416539266. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:21:58,976][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:21:59,374][626795] Updated weights for policy 0, policy_version 325472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:01,464][626795] Updated weights for policy 0, policy_version 325482 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:03,438][626795] Updated weights for policy 0, policy_version 325492 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:03,975][24592] Fps is (10 sec: 38503.5, 60 sec: 40004.5, 300 sec: 39488.4). Total num frames: 2666455040. Throughput: 0: 9935.5. Samples: 416600754. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:03,977][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:05,495][626795] Updated weights for policy 0, policy_version 325502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:07,416][626795] Updated weights for policy 0, policy_version 325512 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:08,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40140.8, 300 sec: 39488.2). Total num frames: 2666659840. Throughput: 0: 10080.5. Samples: 416662038. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:08,976][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:09,421][626795] Updated weights for policy 0, policy_version 325522 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:11,436][626795] Updated weights for policy 0, policy_version 325532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:13,418][626795] Updated weights for policy 0, policy_version 325542 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:13,976][24592] Fps is (10 sec: 40139.6, 60 sec: 39867.8, 300 sec: 39543.8). Total num frames: 2666856448. Throughput: 0: 10081.2. Samples: 416692596. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:13,978][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:15,295][626795] Updated weights for policy 0, policy_version 325552 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:17,316][626772] Signal inference workers to stop experience collection... (5400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:17,319][626772] Signal inference workers to resume experience collection... (5400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:17,332][626795] InferenceWorker_p0-w0: stopping experience collection (5400 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:17,339][626795] InferenceWorker_p0-w0: resuming experience collection (5400 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:17,362][626795] Updated weights for policy 0, policy_version 325562 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:18,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40140.8, 300 sec: 39599.3). Total num frames: 2667069440. Throughput: 0: 10062.7. Samples: 416753754. Policy #0 lag: (min: 0.0, avg: 2.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:18,979][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:19,469][626795] Updated weights for policy 0, policy_version 325572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:21,597][626795] Updated weights for policy 0, policy_version 325582 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:23,361][626795] Updated weights for policy 0, policy_version 325592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:23,976][24592] Fps is (10 sec: 40959.2, 60 sec: 40413.6, 300 sec: 39571.5). Total num frames: 2667266048. Throughput: 0: 10049.4. Samples: 416814828. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:23,977][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:25,487][626795] Updated weights for policy 0, policy_version 325602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:27,531][626795] Updated weights for policy 0, policy_version 325612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:29,171][24592] Fps is (10 sec: 37764.2, 60 sec: 40012.7, 300 sec: 39600.8). Total num frames: 2667454464. Throughput: 0: 10012.1. Samples: 416845128. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:29,172][24592] Avg episode reward: [(0, '4.368')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:30,173][626795] Updated weights for policy 0, policy_version 325622 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:32,223][626795] Updated weights for policy 0, policy_version 325632 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:33,975][24592] Fps is (10 sec: 38504.1, 60 sec: 40004.2, 300 sec: 39599.3). Total num frames: 2667651072. Throughput: 0: 9900.8. Samples: 416899428. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:33,978][24592] Avg episode reward: [(0, '4.352')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:34,228][626795] Updated weights for policy 0, policy_version 325642 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:36,228][626795] Updated weights for policy 0, policy_version 325652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:38,203][626795] Updated weights for policy 0, policy_version 325662 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:38,977][24592] Fps is (10 sec: 40939.3, 60 sec: 40004.0, 300 sec: 39654.8). Total num frames: 2667855872. Throughput: 0: 9886.0. Samples: 416961012. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:38,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:40,184][626795] Updated weights for policy 0, policy_version 325672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:42,236][626795] Updated weights for policy 0, policy_version 325682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:43,976][24592] Fps is (10 sec: 40139.1, 60 sec: 39867.5, 300 sec: 39627.0). Total num frames: 2668052480. Throughput: 0: 10041.5. Samples: 416991138. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:43,977][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:44,257][626795] Updated weights for policy 0, policy_version 325692 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:46,200][626795] Updated weights for policy 0, policy_version 325702 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:48,215][626795] Updated weights for policy 0, policy_version 325712 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:48,975][24592] Fps is (10 sec: 40142.6, 60 sec: 39733.7, 300 sec: 39627.1). Total num frames: 2668257280. Throughput: 0: 10041.1. Samples: 417052602. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:48,977][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:50,402][626795] Updated weights for policy 0, policy_version 325722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:52,250][626795] Updated weights for policy 0, policy_version 325732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:53,975][24592] Fps is (10 sec: 40961.4, 60 sec: 39867.8, 300 sec: 39627.0). Total num frames: 2668462080. Throughput: 0: 10044.9. Samples: 417114060. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:53,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:54,278][626795] Updated weights for policy 0, policy_version 325742 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:56,297][626795] Updated weights for policy 0, policy_version 325752 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:22:58,233][626795] Updated weights for policy 0, policy_version 325762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:58,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40413.9, 300 sec: 39654.8). Total num frames: 2668666880. Throughput: 0: 10036.7. Samples: 417144246. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:22:58,977][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:00,269][626795] Updated weights for policy 0, policy_version 325772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:02,988][626795] Updated weights for policy 0, policy_version 325782 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:03,976][24592] Fps is (10 sec: 37683.0, 60 sec: 39731.1, 300 sec: 39627.1). Total num frames: 2668838912. Throughput: 0: 9878.8. Samples: 417198300. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:03,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000325786_2668838912.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:04,132][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000324629_2659360768.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:05,132][626795] Updated weights for policy 0, policy_version 325792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:07,158][626795] Updated weights for policy 0, policy_version 325802 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:08,975][24592] Fps is (10 sec: 37683.1, 60 sec: 39731.2, 300 sec: 39654.8). Total num frames: 2669043712. Throughput: 0: 9884.8. Samples: 417259638. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:08,976][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:09,321][626795] Updated weights for policy 0, policy_version 325812 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:11,192][626795] Updated weights for policy 0, policy_version 325822 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:13,145][626795] Updated weights for policy 0, policy_version 325832 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:13,976][24592] Fps is (10 sec: 40956.3, 60 sec: 39867.2, 300 sec: 39654.7). Total num frames: 2669248512. Throughput: 0: 9906.0. Samples: 417288972. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:13,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:15,133][626795] Updated weights for policy 0, policy_version 325842 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:17,143][626795] Updated weights for policy 0, policy_version 325852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:18,976][24592] Fps is (10 sec: 40958.0, 60 sec: 39730.8, 300 sec: 39654.8). Total num frames: 2669453312. Throughput: 0: 10029.6. Samples: 417350766. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:18,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:19,227][626795] Updated weights for policy 0, policy_version 325862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:21,048][626795] Updated weights for policy 0, policy_version 325872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:23,063][626795] Updated weights for policy 0, policy_version 325882 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:23,975][24592] Fps is (10 sec: 40964.4, 60 sec: 39868.0, 300 sec: 39654.9). Total num frames: 2669658112. Throughput: 0: 10028.6. Samples: 417412296. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:23,976][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:25,192][626795] Updated weights for policy 0, policy_version 325892 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:27,280][626795] Updated weights for policy 0, policy_version 325902 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:28,975][24592] Fps is (10 sec: 39323.6, 60 sec: 39998.0, 300 sec: 39599.3). Total num frames: 2669846528. Throughput: 0: 10019.6. Samples: 417442014. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:28,976][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:29,343][626795] Updated weights for policy 0, policy_version 325912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:31,640][626795] Updated weights for policy 0, policy_version 325922 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:33,597][626795] Updated weights for policy 0, policy_version 325932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:33,975][24592] Fps is (10 sec: 38502.1, 60 sec: 39867.7, 300 sec: 39682.6). Total num frames: 2670043136. Throughput: 0: 9935.9. Samples: 417499716. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:33,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:36,377][626795] Updated weights for policy 0, policy_version 325942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:38,417][626795] Updated weights for policy 0, policy_version 325952 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:38,976][24592] Fps is (10 sec: 36864.0, 60 sec: 39321.9, 300 sec: 39571.5). Total num frames: 2670215168. Throughput: 0: 9760.3. Samples: 417553272. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:38,977][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:40,449][626795] Updated weights for policy 0, policy_version 325962 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:42,422][626795] Updated weights for policy 0, policy_version 325972 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:43,976][24592] Fps is (10 sec: 38500.2, 60 sec: 39594.5, 300 sec: 39599.2). Total num frames: 2670428160. Throughput: 0: 9762.8. Samples: 417583578. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:43,979][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:44,558][626795] Updated weights for policy 0, policy_version 325982 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:46,650][626795] Updated weights for policy 0, policy_version 325992 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:48,492][626795] Updated weights for policy 0, policy_version 326002 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:48,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39321.6, 300 sec: 39543.8). Total num frames: 2670616576. Throughput: 0: 9890.7. Samples: 417643380. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:48,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:50,598][626795] Updated weights for policy 0, policy_version 326012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:52,670][626795] Updated weights for policy 0, policy_version 326022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:53,975][24592] Fps is (10 sec: 39323.7, 60 sec: 39321.6, 300 sec: 39543.8). Total num frames: 2670821376. Throughput: 0: 9884.8. Samples: 417704454. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:53,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:54,624][626795] Updated weights for policy 0, policy_version 326032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:56,633][626795] Updated weights for policy 0, policy_version 326042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:23:58,581][626795] Updated weights for policy 0, policy_version 326052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:58,975][24592] Fps is (10 sec: 40140.7, 60 sec: 39185.0, 300 sec: 39516.0). Total num frames: 2671017984. Throughput: 0: 9895.6. Samples: 417734262. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:23:58,976][24592] Avg episode reward: [(0, '4.453')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:00,706][626795] Updated weights for policy 0, policy_version 326062 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:03,199][626795] Updated weights for policy 0, policy_version 326072 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:03,976][24592] Fps is (10 sec: 39319.5, 60 sec: 39594.4, 300 sec: 39515.9). Total num frames: 2671214592. Throughput: 0: 9781.7. Samples: 417790944. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:03,978][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:05,206][626795] Updated weights for policy 0, policy_version 326082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:07,233][626795] Updated weights for policy 0, policy_version 326092 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:08,976][24592] Fps is (10 sec: 36863.1, 60 sec: 39048.4, 300 sec: 39516.0). Total num frames: 2671386624. Throughput: 0: 9610.2. Samples: 417844758. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:08,977][24592] Avg episode reward: [(0, '4.357')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:09,974][626795] Updated weights for policy 0, policy_version 326102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:12,014][626795] Updated weights for policy 0, policy_version 326112 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:13,920][626795] Updated weights for policy 0, policy_version 326122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:13,975][24592] Fps is (10 sec: 37685.5, 60 sec: 39049.2, 300 sec: 39488.2). Total num frames: 2671591424. Throughput: 0: 9626.7. Samples: 417875214. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:13,976][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:15,854][626795] Updated weights for policy 0, policy_version 326132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:17,943][626795] Updated weights for policy 0, policy_version 326142 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:18,976][24592] Fps is (10 sec: 40959.7, 60 sec: 39048.6, 300 sec: 39515.9). Total num frames: 2671796224. Throughput: 0: 9710.2. Samples: 417936678. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:18,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:19,959][626795] Updated weights for policy 0, policy_version 326152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:21,883][626795] Updated weights for policy 0, policy_version 326162 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:23,900][626795] Updated weights for policy 0, policy_version 326172 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:23,975][24592] Fps is (10 sec: 40960.0, 60 sec: 39048.5, 300 sec: 39543.8). Total num frames: 2672001024. Throughput: 0: 9894.7. Samples: 417998532. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:23,977][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:25,911][626795] Updated weights for policy 0, policy_version 326182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:27,862][626795] Updated weights for policy 0, policy_version 326192 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:28,975][24592] Fps is (10 sec: 40961.6, 60 sec: 39321.6, 300 sec: 39627.1). Total num frames: 2672205824. Throughput: 0: 9902.0. Samples: 418029162. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:28,977][24592] Avg episode reward: [(0, '4.680')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:29,969][626795] Updated weights for policy 0, policy_version 326202 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:31,903][626795] Updated weights for policy 0, policy_version 326212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:33,955][626795] Updated weights for policy 0, policy_version 326222 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:33,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39458.1, 300 sec: 39710.5). Total num frames: 2672410624. Throughput: 0: 9917.1. Samples: 418089648. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:33,977][24592] Avg episode reward: [(0, '4.940')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:35,980][626795] Updated weights for policy 0, policy_version 326232 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:37,905][626795] Updated weights for policy 0, policy_version 326242 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:38,976][24592] Fps is (10 sec: 40959.5, 60 sec: 40004.2, 300 sec: 39812.0). Total num frames: 2672615424. Throughput: 0: 9938.4. Samples: 418151682. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:38,977][24592] Avg episode reward: [(0, '5.019')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:40,054][626795] Updated weights for policy 0, policy_version 326252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:42,691][626795] Updated weights for policy 0, policy_version 326262 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:43,975][24592] Fps is (10 sec: 37683.3, 60 sec: 39322.0, 300 sec: 39682.7). Total num frames: 2672787456. Throughput: 0: 9784.4. Samples: 418174560. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:43,976][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:44,734][626795] Updated weights for policy 0, policy_version 326272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:46,709][626795] Updated weights for policy 0, policy_version 326282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:48,706][626795] Updated weights for policy 0, policy_version 326292 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:48,976][24592] Fps is (10 sec: 37682.4, 60 sec: 39594.5, 300 sec: 39710.4). Total num frames: 2672992256. Throughput: 0: 9899.3. Samples: 418236408. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:48,978][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:50,725][626795] Updated weights for policy 0, policy_version 326302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:52,590][626795] Updated weights for policy 0, policy_version 326312 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:53,976][24592] Fps is (10 sec: 40959.3, 60 sec: 39594.6, 300 sec: 39710.4). Total num frames: 2673197056. Throughput: 0: 10063.4. Samples: 418297608. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:53,978][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:54,679][626795] Updated weights for policy 0, policy_version 326322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:56,773][626795] Updated weights for policy 0, policy_version 326332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:24:58,858][626795] Updated weights for policy 0, policy_version 326342 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:58,975][24592] Fps is (10 sec: 40142.3, 60 sec: 39594.7, 300 sec: 39682.6). Total num frames: 2673393664. Throughput: 0: 10055.1. Samples: 418327692. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:24:58,977][24592] Avg episode reward: [(0, '5.013')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:00,795][626795] Updated weights for policy 0, policy_version 326352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:02,711][626795] Updated weights for policy 0, policy_version 326362 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:03,975][24592] Fps is (10 sec: 40960.8, 60 sec: 39868.1, 300 sec: 39710.4). Total num frames: 2673606656. Throughput: 0: 10060.7. Samples: 418389408. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:03,976][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000326368_2673606656.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:04,155][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000325202_2664054784.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:04,833][626795] Updated weights for policy 0, policy_version 326372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:06,879][626795] Updated weights for policy 0, policy_version 326382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:08,766][626795] Updated weights for policy 0, policy_version 326392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:08,975][24592] Fps is (10 sec: 40959.5, 60 sec: 40277.5, 300 sec: 39682.6). Total num frames: 2673803264. Throughput: 0: 10047.4. Samples: 418450668. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:08,978][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:10,765][626795] Updated weights for policy 0, policy_version 326402 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:12,814][626795] Updated weights for policy 0, policy_version 326412 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:13,976][24592] Fps is (10 sec: 37682.0, 60 sec: 39867.5, 300 sec: 39710.4). Total num frames: 2673983488. Throughput: 0: 10032.5. Samples: 418480626. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:13,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:15,543][626795] Updated weights for policy 0, policy_version 326422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:17,467][626795] Updated weights for policy 0, policy_version 326432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:18,975][24592] Fps is (10 sec: 38502.4, 60 sec: 39867.9, 300 sec: 39710.4). Total num frames: 2674188288. Throughput: 0: 9898.4. Samples: 418535076. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:18,977][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:19,559][626795] Updated weights for policy 0, policy_version 326442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:21,450][626795] Updated weights for policy 0, policy_version 326452 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:23,487][626795] Updated weights for policy 0, policy_version 326462 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:23,975][24592] Fps is (10 sec: 40961.3, 60 sec: 39867.7, 300 sec: 39682.6). Total num frames: 2674393088. Throughput: 0: 9893.8. Samples: 418596900. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:23,978][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:25,537][626795] Updated weights for policy 0, policy_version 326472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:27,576][626795] Updated weights for policy 0, policy_version 326482 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:28,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39867.7, 300 sec: 39710.4). Total num frames: 2674597888. Throughput: 0: 10054.3. Samples: 418627002. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:28,978][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:29,344][626795] Updated weights for policy 0, policy_version 326492 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:31,502][626795] Updated weights for policy 0, policy_version 326502 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:33,525][626795] Updated weights for policy 0, policy_version 326512 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:33,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39867.8, 300 sec: 39710.4). Total num frames: 2674802688. Throughput: 0: 10037.0. Samples: 418688070. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:33,976][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:35,555][626795] Updated weights for policy 0, policy_version 326522 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:37,520][626795] Updated weights for policy 0, policy_version 326532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:38,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39867.8, 300 sec: 39793.7). Total num frames: 2675007488. Throughput: 0: 10041.8. Samples: 418749486. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:38,977][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:39,591][626795] Updated weights for policy 0, policy_version 326542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:41,619][626795] Updated weights for policy 0, policy_version 326552 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:43,581][626795] Updated weights for policy 0, policy_version 326562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:43,976][24592] Fps is (10 sec: 40957.7, 60 sec: 40413.5, 300 sec: 39793.6). Total num frames: 2675212288. Throughput: 0: 10045.7. Samples: 418779756. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:43,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:45,623][626795] Updated weights for policy 0, policy_version 326572 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:48,211][626795] Updated weights for policy 0, policy_version 326582 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:48,975][24592] Fps is (10 sec: 37683.1, 60 sec: 39867.9, 300 sec: 39793.7). Total num frames: 2675384320. Throughput: 0: 9882.5. Samples: 418834122. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:48,976][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:50,291][626795] Updated weights for policy 0, policy_version 326592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:52,390][626795] Updated weights for policy 0, policy_version 326602 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:53,976][24592] Fps is (10 sec: 37683.9, 60 sec: 39867.6, 300 sec: 39793.7). Total num frames: 2675589120. Throughput: 0: 9874.1. Samples: 418895004. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:53,978][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:54,368][626795] Updated weights for policy 0, policy_version 326612 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:56,492][626795] Updated weights for policy 0, policy_version 326622 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:25:58,348][626795] Updated weights for policy 0, policy_version 326632 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:58,977][24592] Fps is (10 sec: 40136.5, 60 sec: 39866.9, 300 sec: 39765.8). Total num frames: 2675785728. Throughput: 0: 9875.7. Samples: 418925040. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:25:58,978][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:00,495][626795] Updated weights for policy 0, policy_version 326642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:02,444][626795] Updated weights for policy 0, policy_version 326652 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:03,976][24592] Fps is (10 sec: 40138.5, 60 sec: 39730.6, 300 sec: 39793.5). Total num frames: 2675990528. Throughput: 0: 10019.3. Samples: 418985952. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:03,977][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:04,579][626795] Updated weights for policy 0, policy_version 326662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:06,398][626795] Updated weights for policy 0, policy_version 326672 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:08,499][626795] Updated weights for policy 0, policy_version 326682 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:08,975][24592] Fps is (10 sec: 40964.4, 60 sec: 39867.8, 300 sec: 39766.0). Total num frames: 2676195328. Throughput: 0: 10005.5. Samples: 419047146. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:08,977][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:10,600][626795] Updated weights for policy 0, policy_version 326692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:12,506][626795] Updated weights for policy 0, policy_version 326702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:13,976][24592] Fps is (10 sec: 40961.3, 60 sec: 40277.1, 300 sec: 39793.6). Total num frames: 2676400128. Throughput: 0: 10008.9. Samples: 419077410. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:13,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:14,608][626795] Updated weights for policy 0, policy_version 326712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:16,527][626795] Updated weights for policy 0, policy_version 326722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:18,435][626795] Updated weights for policy 0, policy_version 326732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:18,975][24592] Fps is (10 sec: 40140.9, 60 sec: 40140.8, 300 sec: 39849.2). Total num frames: 2676596736. Throughput: 0: 10012.4. Samples: 419138628. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:18,977][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:21,260][626795] Updated weights for policy 0, policy_version 326742 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:23,221][626795] Updated weights for policy 0, policy_version 326752 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:23,977][24592] Fps is (10 sec: 37678.4, 60 sec: 39729.9, 300 sec: 39766.1). Total num frames: 2676776960. Throughput: 0: 9850.3. Samples: 419192766. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:23,980][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:25,270][626795] Updated weights for policy 0, policy_version 326762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:27,279][626795] Updated weights for policy 0, policy_version 326772 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:28,975][24592] Fps is (10 sec: 38502.6, 60 sec: 39731.3, 300 sec: 39765.9). Total num frames: 2676981760. Throughput: 0: 9854.5. Samples: 419223204. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:28,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:29,262][626795] Updated weights for policy 0, policy_version 326782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:31,280][626795] Updated weights for policy 0, policy_version 326792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:33,443][626795] Updated weights for policy 0, policy_version 326802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:33,975][24592] Fps is (10 sec: 39329.2, 60 sec: 39458.1, 300 sec: 39710.4). Total num frames: 2677170176. Throughput: 0: 9982.7. Samples: 419283342. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:33,976][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:36,035][626795] Updated weights for policy 0, policy_version 326812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:38,399][626795] Updated weights for policy 0, policy_version 326822 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:38,976][24592] Fps is (10 sec: 36860.2, 60 sec: 39047.9, 300 sec: 39626.9). Total num frames: 2677350400. Throughput: 0: 9784.9. Samples: 419335332. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:38,979][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:40,331][626795] Updated weights for policy 0, policy_version 326832 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:42,408][626795] Updated weights for policy 0, policy_version 326842 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:43,975][24592] Fps is (10 sec: 37683.0, 60 sec: 38912.3, 300 sec: 39572.0). Total num frames: 2677547008. Throughput: 0: 9784.9. Samples: 419365350. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:43,977][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:44,409][626795] Updated weights for policy 0, policy_version 326852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:46,484][626795] Updated weights for policy 0, policy_version 326862 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:48,417][626795] Updated weights for policy 0, policy_version 326872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:48,975][24592] Fps is (10 sec: 40144.7, 60 sec: 39458.2, 300 sec: 39599.3). Total num frames: 2677751808. Throughput: 0: 9778.7. Samples: 419425986. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:48,977][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:50,455][626795] Updated weights for policy 0, policy_version 326882 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:53,163][626795] Updated weights for policy 0, policy_version 326892 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:53,975][24592] Fps is (10 sec: 38502.5, 60 sec: 39048.8, 300 sec: 39627.1). Total num frames: 2677932032. Throughput: 0: 9625.1. Samples: 419480274. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:53,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:55,269][626795] Updated weights for policy 0, policy_version 326902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:57,245][626795] Updated weights for policy 0, policy_version 326912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:58,975][24592] Fps is (10 sec: 37682.8, 60 sec: 39049.2, 300 sec: 39571.5). Total num frames: 2678128640. Throughput: 0: 9611.3. Samples: 419509914. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:26:58,976][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:26:59,324][626795] Updated weights for policy 0, policy_version 326922 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:01,411][626795] Updated weights for policy 0, policy_version 326932 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:03,812][626795] Updated weights for policy 0, policy_version 326942 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:03,975][24592] Fps is (10 sec: 37683.0, 60 sec: 38639.5, 300 sec: 39488.2). Total num frames: 2678308864. Throughput: 0: 9555.9. Samples: 419568642. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:03,976][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000326942_2678308864.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:04,176][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000325786_2668838912.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:06,198][626795] Updated weights for policy 0, policy_version 326952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:08,406][626795] Updated weights for policy 0, policy_version 326962 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:08,975][24592] Fps is (10 sec: 36864.5, 60 sec: 38365.9, 300 sec: 39460.5). Total num frames: 2678497280. Throughput: 0: 9545.5. Samples: 419622294. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:08,980][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:10,492][626795] Updated weights for policy 0, policy_version 326972 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:12,798][626795] Updated weights for policy 0, policy_version 326982 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:13,975][24592] Fps is (10 sec: 36044.6, 60 sec: 37820.1, 300 sec: 39321.6). Total num frames: 2678669312. Throughput: 0: 9478.4. Samples: 419649732. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:13,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:15,155][626795] Updated weights for policy 0, policy_version 326992 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:17,372][626795] Updated weights for policy 0, policy_version 327002 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:18,980][24592] Fps is (10 sec: 35210.2, 60 sec: 37544.0, 300 sec: 39265.5). Total num frames: 2678849536. Throughput: 0: 9344.6. Samples: 419703888. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:18,981][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:19,617][626795] Updated weights for policy 0, policy_version 327012 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:21,549][626795] Updated weights for policy 0, policy_version 327022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:23,626][626795] Updated weights for policy 0, policy_version 327032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:23,975][24592] Fps is (10 sec: 38502.5, 60 sec: 37957.4, 300 sec: 39347.7). Total num frames: 2679054336. Throughput: 0: 9487.8. Samples: 419762274. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:23,978][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:26,343][626795] Updated weights for policy 0, policy_version 327042 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:28,418][626795] Updated weights for policy 0, policy_version 327052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:28,975][24592] Fps is (10 sec: 37699.6, 60 sec: 37410.1, 300 sec: 39238.3). Total num frames: 2679226368. Throughput: 0: 9331.9. Samples: 419785284. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:28,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:30,467][626795] Updated weights for policy 0, policy_version 327062 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:32,505][626795] Updated weights for policy 0, policy_version 327072 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:33,975][24592] Fps is (10 sec: 37683.6, 60 sec: 37683.2, 300 sec: 39238.4). Total num frames: 2679431168. Throughput: 0: 9324.1. Samples: 419845572. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:33,976][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:34,563][626795] Updated weights for policy 0, policy_version 327082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:36,475][626795] Updated weights for policy 0, policy_version 327092 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:38,529][626795] Updated weights for policy 0, policy_version 327102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:38,975][24592] Fps is (10 sec: 40960.1, 60 sec: 38093.5, 300 sec: 39266.1). Total num frames: 2679635968. Throughput: 0: 9488.1. Samples: 419907240. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:38,976][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:40,694][626795] Updated weights for policy 0, policy_version 327112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:42,662][626795] Updated weights for policy 0, policy_version 327122 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:43,977][24592] Fps is (10 sec: 40954.3, 60 sec: 38228.5, 300 sec: 39265.9). Total num frames: 2679840768. Throughput: 0: 9478.8. Samples: 419936472. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:43,977][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:44,384][626772] Signal inference workers to stop experience collection... (5450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:44,388][626772] Signal inference workers to resume experience collection... (5450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:44,400][626795] InferenceWorker_p0-w0: stopping experience collection (5450 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:44,401][626795] InferenceWorker_p0-w0: resuming experience collection (5450 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:44,720][626795] Updated weights for policy 0, policy_version 327132 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:46,683][626795] Updated weights for policy 0, policy_version 327142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:48,758][626795] Updated weights for policy 0, policy_version 327152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:48,977][24592] Fps is (10 sec: 40134.4, 60 sec: 38091.8, 300 sec: 39238.1). Total num frames: 2680037376. Throughput: 0: 9513.1. Samples: 419996748. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:48,978][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:50,861][626795] Updated weights for policy 0, policy_version 327162 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:52,762][626795] Updated weights for policy 0, policy_version 327172 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:53,975][24592] Fps is (10 sec: 39326.6, 60 sec: 38365.8, 300 sec: 39210.5). Total num frames: 2680233984. Throughput: 0: 9667.7. Samples: 420057342. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:53,977][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:54,927][626795] Updated weights for policy 0, policy_version 327182 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:56,873][626795] Updated weights for policy 0, policy_version 327192 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:58,975][24592] Fps is (10 sec: 37689.1, 60 sec: 38092.9, 300 sec: 39238.3). Total num frames: 2680414208. Throughput: 0: 9738.7. Samples: 420087972. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:27:58,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:27:59,615][626795] Updated weights for policy 0, policy_version 327202 (0.0705)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:01,613][626795] Updated weights for policy 0, policy_version 327212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:03,616][626795] Updated weights for policy 0, policy_version 327222 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:03,976][24592] Fps is (10 sec: 38500.6, 60 sec: 38502.1, 300 sec: 39238.2). Total num frames: 2680619008. Throughput: 0: 9733.5. Samples: 420141858. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:03,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:05,614][626795] Updated weights for policy 0, policy_version 327232 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:07,646][626795] Updated weights for policy 0, policy_version 327242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:08,976][24592] Fps is (10 sec: 40140.3, 60 sec: 38638.8, 300 sec: 39210.6). Total num frames: 2680815616. Throughput: 0: 9780.0. Samples: 420202374. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:08,978][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:09,658][626795] Updated weights for policy 0, policy_version 327252 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:11,785][626795] Updated weights for policy 0, policy_version 327262 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:13,683][626795] Updated weights for policy 0, policy_version 327272 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:13,975][24592] Fps is (10 sec: 40143.1, 60 sec: 39185.1, 300 sec: 39210.6). Total num frames: 2681020416. Throughput: 0: 9942.8. Samples: 420232710. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:13,976][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:15,738][626795] Updated weights for policy 0, policy_version 327282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:17,624][626795] Updated weights for policy 0, policy_version 327292 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:18,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39597.5, 300 sec: 39210.5). Total num frames: 2681225216. Throughput: 0: 9981.2. Samples: 420294726. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:18,977][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:19,744][626795] Updated weights for policy 0, policy_version 327302 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:21,737][626795] Updated weights for policy 0, policy_version 327312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:23,649][626795] Updated weights for policy 0, policy_version 327322 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:23,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39594.7, 300 sec: 39266.1). Total num frames: 2681430016. Throughput: 0: 9974.3. Samples: 420356082. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:23,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:25,757][626795] Updated weights for policy 0, policy_version 327332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:27,724][626795] Updated weights for policy 0, policy_version 327342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:28,976][24592] Fps is (10 sec: 40957.1, 60 sec: 40140.3, 300 sec: 39293.7). Total num frames: 2681634816. Throughput: 0: 9992.8. Samples: 420386142. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:28,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:29,631][626795] Updated weights for policy 0, policy_version 327352 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:32,414][626795] Updated weights for policy 0, policy_version 327362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:33,975][24592] Fps is (10 sec: 38502.1, 60 sec: 39731.1, 300 sec: 39321.6). Total num frames: 2681815040. Throughput: 0: 9851.8. Samples: 420440064. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:33,977][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:34,480][626795] Updated weights for policy 0, policy_version 327372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:36,433][626795] Updated weights for policy 0, policy_version 327382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:38,373][626795] Updated weights for policy 0, policy_version 327392 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:38,975][24592] Fps is (10 sec: 37686.4, 60 sec: 39594.7, 300 sec: 39266.1). Total num frames: 2682011648. Throughput: 0: 9879.5. Samples: 420501918. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:38,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:40,485][626795] Updated weights for policy 0, policy_version 327402 (0.0031)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:42,616][626795] Updated weights for policy 0, policy_version 327412 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:43,976][24592] Fps is (10 sec: 39318.5, 60 sec: 39458.4, 300 sec: 39293.7). Total num frames: 2682208256. Throughput: 0: 9871.5. Samples: 420532200. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:43,980][24592] Avg episode reward: [(0, '4.936')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:44,923][626795] Updated weights for policy 0, policy_version 327422 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:47,196][626795] Updated weights for policy 0, policy_version 327432 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:48,975][24592] Fps is (10 sec: 37683.3, 60 sec: 39186.1, 300 sec: 39210.5). Total num frames: 2682388480. Throughput: 0: 9859.7. Samples: 420585540. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:48,977][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:49,556][626795] Updated weights for policy 0, policy_version 327442 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:51,683][626795] Updated weights for policy 0, policy_version 327452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:53,975][24592] Fps is (10 sec: 35228.9, 60 sec: 38775.6, 300 sec: 39127.2). Total num frames: 2682560512. Throughput: 0: 9734.3. Samples: 420640416. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:53,978][24592] Avg episode reward: [(0, '4.435')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:54,011][626795] Updated weights for policy 0, policy_version 327462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:56,218][626795] Updated weights for policy 0, policy_version 327472 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:28:58,437][626795] Updated weights for policy 0, policy_version 327482 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:58,975][24592] Fps is (10 sec: 36044.5, 60 sec: 38912.0, 300 sec: 39099.5). Total num frames: 2682748928. Throughput: 0: 9647.2. Samples: 420666834. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:28:58,979][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:00,847][626795] Updated weights for policy 0, policy_version 327492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:03,350][626795] Updated weights for policy 0, policy_version 327502 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:04,516][24592] Fps is (10 sec: 33418.7, 60 sec: 37888.3, 300 sec: 39000.2). Total num frames: 2682912768. Throughput: 0: 9304.2. Samples: 420718446. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:04,518][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:04,551][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000327505_2682920960.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:04,784][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000326368_2673606656.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:06,064][626795] Updated weights for policy 0, policy_version 327512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:08,260][626795] Updated weights for policy 0, policy_version 327522 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:08,975][24592] Fps is (10 sec: 33587.3, 60 sec: 37819.8, 300 sec: 38960.6). Total num frames: 2683084800. Throughput: 0: 9237.1. Samples: 420771750. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:08,976][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:10,253][626795] Updated weights for policy 0, policy_version 327532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:12,585][626795] Updated weights for policy 0, policy_version 327542 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:13,975][24592] Fps is (10 sec: 38970.9, 60 sec: 37683.2, 300 sec: 38932.9). Total num frames: 2683281408. Throughput: 0: 9157.1. Samples: 420798204. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:13,979][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:14,630][626795] Updated weights for policy 0, policy_version 327552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:16,899][626795] Updated weights for policy 0, policy_version 327562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:18,896][626795] Updated weights for policy 0, policy_version 327572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:18,975][24592] Fps is (10 sec: 38502.6, 60 sec: 37410.2, 300 sec: 38877.3). Total num frames: 2683469824. Throughput: 0: 9219.8. Samples: 420854952. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:18,976][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:21,065][626795] Updated weights for policy 0, policy_version 327582 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:23,063][626795] Updated weights for policy 0, policy_version 327592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:23,975][24592] Fps is (10 sec: 38502.4, 60 sec: 37273.6, 300 sec: 38849.5). Total num frames: 2683666432. Throughput: 0: 9184.0. Samples: 420915198. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:23,977][24592] Avg episode reward: [(0, '4.374')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:25,097][626795] Updated weights for policy 0, policy_version 327602 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:27,064][626795] Updated weights for policy 0, policy_version 327612 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:28,975][24592] Fps is (10 sec: 40140.7, 60 sec: 37274.1, 300 sec: 38849.5). Total num frames: 2683871232. Throughput: 0: 9188.7. Samples: 420945684. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:28,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:29,231][626795] Updated weights for policy 0, policy_version 327622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:31,145][626795] Updated weights for policy 0, policy_version 327632 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:33,151][626795] Updated weights for policy 0, policy_version 327642 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:33,976][24592] Fps is (10 sec: 40136.9, 60 sec: 37546.1, 300 sec: 38821.6). Total num frames: 2684067840. Throughput: 0: 9352.3. Samples: 421006404. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:33,978][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:35,187][626795] Updated weights for policy 0, policy_version 327652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:37,966][626795] Updated weights for policy 0, policy_version 327662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:38,975][24592] Fps is (10 sec: 37683.1, 60 sec: 37273.6, 300 sec: 38849.5). Total num frames: 2684248064. Throughput: 0: 9294.1. Samples: 421058652. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:38,978][24592] Avg episode reward: [(0, '4.545')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:40,152][626795] Updated weights for policy 0, policy_version 327672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:42,168][626795] Updated weights for policy 0, policy_version 327682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:43,977][24592] Fps is (10 sec: 37683.3, 60 sec: 37273.6, 300 sec: 38821.7). Total num frames: 2684444672. Throughput: 0: 9372.3. Samples: 421088598. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:43,980][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:44,198][626795] Updated weights for policy 0, policy_version 327692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:46,219][626795] Updated weights for policy 0, policy_version 327702 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:48,268][626795] Updated weights for policy 0, policy_version 327712 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:48,975][24592] Fps is (10 sec: 39321.8, 60 sec: 37546.7, 300 sec: 38794.0). Total num frames: 2684641280. Throughput: 0: 9694.2. Samples: 421149444. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:48,977][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:50,230][626795] Updated weights for policy 0, policy_version 327722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:52,295][626795] Updated weights for policy 0, policy_version 327732 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:53,975][24592] Fps is (10 sec: 40144.4, 60 sec: 38092.8, 300 sec: 38821.7). Total num frames: 2684846080. Throughput: 0: 9739.7. Samples: 421210038. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:53,978][24592] Avg episode reward: [(0, '4.886')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:54,346][626795] Updated weights for policy 0, policy_version 327742 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:56,354][626795] Updated weights for policy 0, policy_version 327752 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:29:58,329][626795] Updated weights for policy 0, policy_version 327762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:58,976][24592] Fps is (10 sec: 40139.2, 60 sec: 38229.1, 300 sec: 38766.2). Total num frames: 2685042688. Throughput: 0: 9814.7. Samples: 421239870. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:29:58,978][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:00,374][626795] Updated weights for policy 0, policy_version 327772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:02,317][626795] Updated weights for policy 0, policy_version 327782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:03,976][24592] Fps is (10 sec: 40138.2, 60 sec: 39265.4, 300 sec: 38793.9). Total num frames: 2685247488. Throughput: 0: 9911.8. Samples: 421300992. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:03,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:04,402][626795] Updated weights for policy 0, policy_version 327792 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:06,463][626795] Updated weights for policy 0, policy_version 327802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:08,461][626795] Updated weights for policy 0, policy_version 327812 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:08,976][24592] Fps is (10 sec: 40960.6, 60 sec: 39458.0, 300 sec: 38877.3). Total num frames: 2685452288. Throughput: 0: 9926.8. Samples: 421361904. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:08,976][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:11,170][626795] Updated weights for policy 0, policy_version 327822 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:13,284][626795] Updated weights for policy 0, policy_version 327832 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:13,976][24592] Fps is (10 sec: 37682.8, 60 sec: 39048.0, 300 sec: 38766.1). Total num frames: 2685624320. Throughput: 0: 9753.4. Samples: 421384596. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:13,978][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:15,247][626795] Updated weights for policy 0, policy_version 327842 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:17,259][626795] Updated weights for policy 0, policy_version 327852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:18,976][24592] Fps is (10 sec: 37681.8, 60 sec: 39321.2, 300 sec: 38766.1). Total num frames: 2685829120. Throughput: 0: 9746.0. Samples: 421444968. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:18,977][24592] Avg episode reward: [(0, '4.459')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:19,311][626795] Updated weights for policy 0, policy_version 327862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:21,409][626795] Updated weights for policy 0, policy_version 327872 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:23,403][626795] Updated weights for policy 0, policy_version 327882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:23,976][24592] Fps is (10 sec: 40143.0, 60 sec: 39321.4, 300 sec: 38738.4). Total num frames: 2686025728. Throughput: 0: 9931.3. Samples: 421505562. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:23,977][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:25,522][626795] Updated weights for policy 0, policy_version 327892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:27,456][626795] Updated weights for policy 0, policy_version 327902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:28,975][24592] Fps is (10 sec: 39323.8, 60 sec: 39185.0, 300 sec: 38710.7). Total num frames: 2686222336. Throughput: 0: 9932.9. Samples: 421535568. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:28,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:29,801][626795] Updated weights for policy 0, policy_version 327912 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:32,049][626795] Updated weights for policy 0, policy_version 327922 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:33,976][24592] Fps is (10 sec: 37683.5, 60 sec: 38912.5, 300 sec: 38627.3). Total num frames: 2686402560. Throughput: 0: 9787.0. Samples: 421589862. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:33,980][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:34,525][626795] Updated weights for policy 0, policy_version 327932 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:36,696][626795] Updated weights for policy 0, policy_version 327942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:38,859][626795] Updated weights for policy 0, policy_version 327952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:38,975][24592] Fps is (10 sec: 36044.8, 60 sec: 38912.0, 300 sec: 38544.1). Total num frames: 2686582784. Throughput: 0: 9647.3. Samples: 421644168. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:38,977][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:41,222][626795] Updated weights for policy 0, policy_version 327962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:43,942][626795] Updated weights for policy 0, policy_version 327972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:43,978][24592] Fps is (10 sec: 34402.7, 60 sec: 38365.7, 300 sec: 38516.1). Total num frames: 2686746624. Throughput: 0: 9579.1. Samples: 421670940. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:43,979][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:45,931][626795] Updated weights for policy 0, policy_version 327982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:48,051][626795] Updated weights for policy 0, policy_version 327992 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:48,975][24592] Fps is (10 sec: 36044.7, 60 sec: 38365.8, 300 sec: 38488.6). Total num frames: 2686943232. Throughput: 0: 9410.0. Samples: 421724436. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:48,976][24592] Avg episode reward: [(0, '4.486')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:50,069][626795] Updated weights for policy 0, policy_version 328002 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:52,239][626795] Updated weights for policy 0, policy_version 328012 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:53,976][24592] Fps is (10 sec: 39325.2, 60 sec: 38229.1, 300 sec: 38488.6). Total num frames: 2687139840. Throughput: 0: 9376.9. Samples: 421783866. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:53,977][24592] Avg episode reward: [(0, '4.890')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:54,375][626795] Updated weights for policy 0, policy_version 328022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:56,181][626795] Updated weights for policy 0, policy_version 328032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:30:58,251][626795] Updated weights for policy 0, policy_version 328042 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:58,975][24592] Fps is (10 sec: 40141.3, 60 sec: 38366.1, 300 sec: 38488.6). Total num frames: 2687344640. Throughput: 0: 9553.5. Samples: 421814496. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:30:58,977][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:00,224][626795] Updated weights for policy 0, policy_version 328052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:02,297][626795] Updated weights for policy 0, policy_version 328062 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:03,976][24592] Fps is (10 sec: 40960.5, 60 sec: 38366.2, 300 sec: 38488.5). Total num frames: 2687549440. Throughput: 0: 9558.6. Samples: 421875102. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:03,978][24592] Avg episode reward: [(0, '4.578')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:03,986][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000328070_2687549440.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:04,169][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000326942_2678308864.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:04,405][626795] Updated weights for policy 0, policy_version 328072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:06,450][626795] Updated weights for policy 0, policy_version 328082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:08,482][626795] Updated weights for policy 0, policy_version 328092 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:08,975][24592] Fps is (10 sec: 40140.6, 60 sec: 38229.5, 300 sec: 38460.8). Total num frames: 2687746048. Throughput: 0: 9541.9. Samples: 421934946. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:08,978][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:10,599][626795] Updated weights for policy 0, policy_version 328102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:12,639][626795] Updated weights for policy 0, policy_version 328112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:13,975][24592] Fps is (10 sec: 39322.1, 60 sec: 38639.4, 300 sec: 38460.7). Total num frames: 2687942656. Throughput: 0: 9524.3. Samples: 421964160. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:13,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:14,777][626795] Updated weights for policy 0, policy_version 328122 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:17,339][626795] Updated weights for policy 0, policy_version 328132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:18,976][24592] Fps is (10 sec: 36862.6, 60 sec: 38092.9, 300 sec: 38433.2). Total num frames: 2688114688. Throughput: 0: 9516.8. Samples: 422018118. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:18,977][24592] Avg episode reward: [(0, '4.805')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:19,383][626795] Updated weights for policy 0, policy_version 328142 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:21,490][626795] Updated weights for policy 0, policy_version 328152 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:23,477][626795] Updated weights for policy 0, policy_version 328162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:23,976][24592] Fps is (10 sec: 37682.1, 60 sec: 38229.3, 300 sec: 38432.9). Total num frames: 2688319488. Throughput: 0: 9649.9. Samples: 422078418. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:23,978][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:25,490][626795] Updated weights for policy 0, policy_version 328172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:27,416][626795] Updated weights for policy 0, policy_version 328182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:28,976][24592] Fps is (10 sec: 40958.8, 60 sec: 38365.4, 300 sec: 38488.4). Total num frames: 2688524288. Throughput: 0: 9724.9. Samples: 422108556. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:28,978][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:29,621][626795] Updated weights for policy 0, policy_version 328192 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:31,460][626795] Updated weights for policy 0, policy_version 328202 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:33,593][626795] Updated weights for policy 0, policy_version 328212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:33,976][24592] Fps is (10 sec: 40959.4, 60 sec: 38775.2, 300 sec: 38571.9). Total num frames: 2688729088. Throughput: 0: 9903.8. Samples: 422170110. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:33,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:35,521][626795] Updated weights for policy 0, policy_version 328222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:37,568][626795] Updated weights for policy 0, policy_version 328232 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:38,975][24592] Fps is (10 sec: 40962.8, 60 sec: 39185.1, 300 sec: 38599.6). Total num frames: 2688933888. Throughput: 0: 9934.9. Samples: 422230932. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:38,978][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:39,650][626795] Updated weights for policy 0, policy_version 328242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:41,695][626795] Updated weights for policy 0, policy_version 328252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:43,550][626795] Updated weights for policy 0, policy_version 328262 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:43,976][24592] Fps is (10 sec: 40141.5, 60 sec: 39731.8, 300 sec: 38571.8). Total num frames: 2689130496. Throughput: 0: 9932.4. Samples: 422261460. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:43,978][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:45,566][626795] Updated weights for policy 0, policy_version 328272 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:47,592][626795] Updated weights for policy 0, policy_version 328282 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:49,169][24592] Fps is (10 sec: 37770.1, 60 sec: 39467.2, 300 sec: 38574.2). Total num frames: 2689318912. Throughput: 0: 9891.0. Samples: 422322114. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:49,171][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:50,387][626795] Updated weights for policy 0, policy_version 328292 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:52,425][626795] Updated weights for policy 0, policy_version 328302 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:53,975][24592] Fps is (10 sec: 37684.2, 60 sec: 39458.3, 300 sec: 38571.8). Total num frames: 2689507328. Throughput: 0: 9800.1. Samples: 422375952. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:53,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:54,420][626795] Updated weights for policy 0, policy_version 328312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:56,484][626795] Updated weights for policy 0, policy_version 328322 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:31:58,396][626795] Updated weights for policy 0, policy_version 328332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:58,975][24592] Fps is (10 sec: 40098.9, 60 sec: 39458.1, 300 sec: 38655.1). Total num frames: 2689712128. Throughput: 0: 9820.5. Samples: 422406084. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:31:58,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:00,521][626795] Updated weights for policy 0, policy_version 328342 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:02,391][626795] Updated weights for policy 0, policy_version 328352 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 39594.8, 300 sec: 38738.4). Total num frames: 2689925120. Throughput: 0: 9992.7. Samples: 422467788. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:03,977][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:04,449][626795] Updated weights for policy 0, policy_version 328362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:06,538][626795] Updated weights for policy 0, policy_version 328372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:08,544][626795] Updated weights for policy 0, policy_version 328382 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:08,975][24592] Fps is (10 sec: 40960.0, 60 sec: 39594.6, 300 sec: 38821.8). Total num frames: 2690121728. Throughput: 0: 10000.1. Samples: 422528418. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:08,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:10,588][626795] Updated weights for policy 0, policy_version 328392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:12,543][626795] Updated weights for policy 0, policy_version 328402 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:13,975][24592] Fps is (10 sec: 39321.7, 60 sec: 39594.7, 300 sec: 38877.9). Total num frames: 2690318336. Throughput: 0: 9982.9. Samples: 422557782. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:13,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:14,650][626795] Updated weights for policy 0, policy_version 328412 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:16,724][626795] Updated weights for policy 0, policy_version 328422 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:18,790][626795] Updated weights for policy 0, policy_version 328432 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:18,975][24592] Fps is (10 sec: 40140.9, 60 sec: 40141.0, 300 sec: 38877.3). Total num frames: 2690523136. Throughput: 0: 9962.5. Samples: 422618418. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:18,976][24592] Avg episode reward: [(0, '4.436')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:20,725][626795] Updated weights for policy 0, policy_version 328442 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:23,497][626795] Updated weights for policy 0, policy_version 328452 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:23,975][24592] Fps is (10 sec: 37683.4, 60 sec: 39594.9, 300 sec: 38877.3). Total num frames: 2690695168. Throughput: 0: 9811.2. Samples: 422672436. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:23,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:25,610][626795] Updated weights for policy 0, policy_version 328462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:27,667][626795] Updated weights for policy 0, policy_version 328472 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:28,976][24592] Fps is (10 sec: 36862.9, 60 sec: 39458.4, 300 sec: 38849.5). Total num frames: 2690891776. Throughput: 0: 9772.9. Samples: 422701242. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:28,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:29,792][626795] Updated weights for policy 0, policy_version 328482 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:31,661][626795] Updated weights for policy 0, policy_version 328492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:33,608][626795] Updated weights for policy 0, policy_version 328502 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:33,975][24592] Fps is (10 sec: 40140.4, 60 sec: 39458.4, 300 sec: 38849.5). Total num frames: 2691096576. Throughput: 0: 9827.9. Samples: 422762466. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:33,976][24592] Avg episode reward: [(0, '4.810')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:35,728][626795] Updated weights for policy 0, policy_version 328512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:37,728][626795] Updated weights for policy 0, policy_version 328522 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:38,979][24592] Fps is (10 sec: 40947.5, 60 sec: 39455.9, 300 sec: 38849.3). Total num frames: 2691301376. Throughput: 0: 9944.3. Samples: 422823480. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:38,981][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:39,884][626795] Updated weights for policy 0, policy_version 328532 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:41,787][626795] Updated weights for policy 0, policy_version 328542 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:43,991][24592] Fps is (10 sec: 39258.7, 60 sec: 39311.3, 300 sec: 38819.8). Total num frames: 2691489792. Throughput: 0: 9931.5. Samples: 422853162. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:43,992][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:44,130][626795] Updated weights for policy 0, policy_version 328552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:46,399][626795] Updated weights for policy 0, policy_version 328562 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:48,605][626795] Updated weights for policy 0, policy_version 328572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:48,975][24592] Fps is (10 sec: 36876.3, 60 sec: 39312.1, 300 sec: 38766.2). Total num frames: 2691670016. Throughput: 0: 9758.7. Samples: 422906928. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:48,977][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:50,985][626795] Updated weights for policy 0, policy_version 328582 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:53,289][626795] Updated weights for policy 0, policy_version 328592 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:53,977][24592] Fps is (10 sec: 36099.0, 60 sec: 39047.9, 300 sec: 38766.1). Total num frames: 2691850240. Throughput: 0: 9604.3. Samples: 422960622. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:53,978][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:56,366][626795] Updated weights for policy 0, policy_version 328602 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:32:58,611][626795] Updated weights for policy 0, policy_version 328612 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:58,975][24592] Fps is (10 sec: 32767.7, 60 sec: 38092.8, 300 sec: 38571.9). Total num frames: 2691997696. Throughput: 0: 9393.6. Samples: 422980494. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:32:58,978][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:00,702][626795] Updated weights for policy 0, policy_version 328622 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:02,748][626795] Updated weights for policy 0, policy_version 328632 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:03,975][24592] Fps is (10 sec: 35229.5, 60 sec: 37956.3, 300 sec: 38599.6). Total num frames: 2692202496. Throughput: 0: 9323.6. Samples: 423037980. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:03,977][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000328638_2692202496.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:04,130][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000327505_2682920960.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:04,850][626795] Updated weights for policy 0, policy_version 328642 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:06,893][626795] Updated weights for policy 0, policy_version 328652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:08,926][626795] Updated weights for policy 0, policy_version 328662 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:08,975][24592] Fps is (10 sec: 40141.0, 60 sec: 37956.2, 300 sec: 38571.8). Total num frames: 2692399104. Throughput: 0: 9433.0. Samples: 423096924. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:08,977][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:11,112][626795] Updated weights for policy 0, policy_version 328672 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:13,183][626795] Updated weights for policy 0, policy_version 328682 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:13,976][24592] Fps is (10 sec: 38500.7, 60 sec: 37819.5, 300 sec: 38516.2). Total num frames: 2692587520. Throughput: 0: 9428.9. Samples: 423125544. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:13,977][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:15,216][626795] Updated weights for policy 0, policy_version 328692 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:17,231][626795] Updated weights for policy 0, policy_version 328702 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:18,975][24592] Fps is (10 sec: 40141.2, 60 sec: 37956.3, 300 sec: 38544.1). Total num frames: 2692800512. Throughput: 0: 9431.8. Samples: 423186894. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:18,977][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:19,235][626795] Updated weights for policy 0, policy_version 328712 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:21,154][626795] Updated weights for policy 0, policy_version 328722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:23,031][626795] Updated weights for policy 0, policy_version 328732 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:23,976][24592] Fps is (10 sec: 41779.8, 60 sec: 38502.2, 300 sec: 38544.1). Total num frames: 2693005312. Throughput: 0: 9471.8. Samples: 423249684. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:23,977][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:24,974][626795] Updated weights for policy 0, policy_version 328742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:27,096][626795] Updated weights for policy 0, policy_version 328752 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:28,975][24592] Fps is (10 sec: 37682.7, 60 sec: 38092.9, 300 sec: 38516.3). Total num frames: 2693177344. Throughput: 0: 9490.7. Samples: 423280092. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:28,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:29,892][626795] Updated weights for policy 0, policy_version 328762 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:31,920][626795] Updated weights for policy 0, policy_version 328772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:33,847][626795] Updated weights for policy 0, policy_version 328782 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:33,975][24592] Fps is (10 sec: 37684.0, 60 sec: 38092.8, 300 sec: 38544.0). Total num frames: 2693382144. Throughput: 0: 9477.3. Samples: 423333408. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:33,977][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:35,775][626795] Updated weights for policy 0, policy_version 328792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:37,759][626795] Updated weights for policy 0, policy_version 328802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:38,977][24592] Fps is (10 sec: 41773.0, 60 sec: 38230.5, 300 sec: 38599.5). Total num frames: 2693595136. Throughput: 0: 9690.2. Samples: 423396684. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:38,978][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:39,749][626795] Updated weights for policy 0, policy_version 328812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:41,635][626795] Updated weights for policy 0, policy_version 328822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:43,601][626795] Updated weights for policy 0, policy_version 328832 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:43,976][24592] Fps is (10 sec: 41778.0, 60 sec: 38512.5, 300 sec: 38682.8). Total num frames: 2693799936. Throughput: 0: 9952.7. Samples: 423428370. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:43,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:45,592][626795] Updated weights for policy 0, policy_version 328842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:47,505][626795] Updated weights for policy 0, policy_version 328852 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:48,976][24592] Fps is (10 sec: 41785.1, 60 sec: 39048.4, 300 sec: 38821.7). Total num frames: 2694012928. Throughput: 0: 10062.9. Samples: 423490812. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:48,976][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:49,466][626795] Updated weights for policy 0, policy_version 328862 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:51,475][626795] Updated weights for policy 0, policy_version 328872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:51,698][626772] Signal inference workers to stop experience collection... (5500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:51,699][626772] Signal inference workers to resume experience collection... (5500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:51,707][626795] InferenceWorker_p0-w0: stopping experience collection (5500 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:51,712][626795] InferenceWorker_p0-w0: resuming experience collection (5500 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:53,399][626795] Updated weights for policy 0, policy_version 328882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:53,976][24592] Fps is (10 sec: 41779.6, 60 sec: 39458.7, 300 sec: 38877.3). Total num frames: 2694217728. Throughput: 0: 10136.0. Samples: 423553044. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:53,978][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:55,538][626795] Updated weights for policy 0, policy_version 328892 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:57,480][626795] Updated weights for policy 0, policy_version 328902 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:58,976][24592] Fps is (10 sec: 40960.1, 60 sec: 40413.8, 300 sec: 39087.8). Total num frames: 2694422528. Throughput: 0: 10180.3. Samples: 423583656. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:33:58,976][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:33:59,455][626795] Updated weights for policy 0, policy_version 328912 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:02,206][626795] Updated weights for policy 0, policy_version 328922 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:03,975][24592] Fps is (10 sec: 36864.8, 60 sec: 39731.1, 300 sec: 38988.4). Total num frames: 2694586368. Throughput: 0: 10011.4. Samples: 423637410. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:03,977][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:04,456][626795] Updated weights for policy 0, policy_version 328932 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:06,313][626795] Updated weights for policy 0, policy_version 328942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:08,278][626795] Updated weights for policy 0, policy_version 328952 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:08,975][24592] Fps is (10 sec: 37683.8, 60 sec: 40004.3, 300 sec: 39043.9). Total num frames: 2694799360. Throughput: 0: 9961.7. Samples: 423697956. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:08,990][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:10,268][626795] Updated weights for policy 0, policy_version 328962 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:12,183][626795] Updated weights for policy 0, policy_version 328972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:13,975][24592] Fps is (10 sec: 41779.4, 60 sec: 40277.6, 300 sec: 39099.4). Total num frames: 2695004160. Throughput: 0: 9984.8. Samples: 423729408. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:13,978][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:14,222][626795] Updated weights for policy 0, policy_version 328982 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:16,187][626795] Updated weights for policy 0, policy_version 328992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:18,166][626795] Updated weights for policy 0, policy_version 329002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:18,975][24592] Fps is (10 sec: 41779.4, 60 sec: 40277.3, 300 sec: 39155.0). Total num frames: 2695217152. Throughput: 0: 10184.8. Samples: 423791724. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:18,977][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:20,030][626795] Updated weights for policy 0, policy_version 329012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:22,010][626795] Updated weights for policy 0, policy_version 329022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:23,973][626795] Updated weights for policy 0, policy_version 329032 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:23,976][24592] Fps is (10 sec: 42597.9, 60 sec: 40414.0, 300 sec: 39182.7). Total num frames: 2695430144. Throughput: 0: 10187.1. Samples: 423855090. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:23,977][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:25,938][626795] Updated weights for policy 0, policy_version 329042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:27,888][626795] Updated weights for policy 0, policy_version 329052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:28,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40960.1, 300 sec: 39210.7). Total num frames: 2695634944. Throughput: 0: 10181.7. Samples: 423886542. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:28,976][24592] Avg episode reward: [(0, '4.368')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:29,970][626795] Updated weights for policy 0, policy_version 329062 (0.0035)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:31,809][626795] Updated weights for policy 0, policy_version 329072 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:34,057][24592] Fps is (10 sec: 38190.4, 60 sec: 40495.2, 300 sec: 39199.6). Total num frames: 2695815168. Throughput: 0: 10153.8. Samples: 423948564. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:34,059][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:34,610][626795] Updated weights for policy 0, policy_version 329082 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:36,656][626795] Updated weights for policy 0, policy_version 329092 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:38,647][626795] Updated weights for policy 0, policy_version 329102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:38,975][24592] Fps is (10 sec: 37682.8, 60 sec: 40278.3, 300 sec: 39210.6). Total num frames: 2696011776. Throughput: 0: 9968.7. Samples: 424001634. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:38,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:40,653][626795] Updated weights for policy 0, policy_version 329112 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:42,590][626795] Updated weights for policy 0, policy_version 329122 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:43,976][24592] Fps is (10 sec: 41297.6, 60 sec: 40414.0, 300 sec: 39266.0). Total num frames: 2696224768. Throughput: 0: 9971.3. Samples: 424032366. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:43,977][24592] Avg episode reward: [(0, '4.910')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:44,634][626795] Updated weights for policy 0, policy_version 329132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:46,463][626795] Updated weights for policy 0, policy_version 329142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:48,392][626795] Updated weights for policy 0, policy_version 329152 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:48,975][24592] Fps is (10 sec: 41779.4, 60 sec: 40277.4, 300 sec: 39266.1). Total num frames: 2696429568. Throughput: 0: 10190.7. Samples: 424095990. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:48,976][24592] Avg episode reward: [(0, '4.278')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:50,215][626795] Updated weights for policy 0, policy_version 329162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:52,262][626795] Updated weights for policy 0, policy_version 329172 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:53,975][24592] Fps is (10 sec: 41779.7, 60 sec: 40414.1, 300 sec: 39321.6). Total num frames: 2696642560. Throughput: 0: 10268.8. Samples: 424160052. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:53,978][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:54,242][626795] Updated weights for policy 0, policy_version 329182 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:56,145][626795] Updated weights for policy 0, policy_version 329192 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:34:58,055][626795] Updated weights for policy 0, policy_version 329202 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:58,975][24592] Fps is (10 sec: 41778.9, 60 sec: 40413.9, 300 sec: 39321.7). Total num frames: 2696847360. Throughput: 0: 10252.3. Samples: 424190760. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:34:58,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:00,162][626795] Updated weights for policy 0, policy_version 329212 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:02,098][626795] Updated weights for policy 0, policy_version 329222 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:03,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41232.9, 300 sec: 39349.4). Total num frames: 2697060352. Throughput: 0: 10252.7. Samples: 424253100. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:03,977][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000329231_2697060352.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:04,113][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000328070_2687549440.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:04,228][626795] Updated weights for policy 0, policy_version 329232 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:06,190][626795] Updated weights for policy 0, policy_version 329242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:08,815][626795] Updated weights for policy 0, policy_version 329252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:08,975][24592] Fps is (10 sec: 38502.5, 60 sec: 40550.4, 300 sec: 39349.5). Total num frames: 2697232384. Throughput: 0: 10031.4. Samples: 424306500. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:08,978][24592] Avg episode reward: [(0, '4.470')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:10,956][626795] Updated weights for policy 0, policy_version 329262 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:12,856][626795] Updated weights for policy 0, policy_version 329272 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:13,976][24592] Fps is (10 sec: 37683.7, 60 sec: 40550.3, 300 sec: 39349.4). Total num frames: 2697437184. Throughput: 0: 10001.4. Samples: 424336608. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:13,977][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:14,887][626795] Updated weights for policy 0, policy_version 329282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:16,805][626795] Updated weights for policy 0, policy_version 329292 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:18,737][626795] Updated weights for policy 0, policy_version 329302 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:18,976][24592] Fps is (10 sec: 41778.4, 60 sec: 40550.2, 300 sec: 39404.9). Total num frames: 2697650176. Throughput: 0: 10037.3. Samples: 424399422. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:18,976][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:20,641][626795] Updated weights for policy 0, policy_version 329312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:22,640][626795] Updated weights for policy 0, policy_version 329322 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:23,975][24592] Fps is (10 sec: 42599.0, 60 sec: 40550.5, 300 sec: 39460.5). Total num frames: 2697863168. Throughput: 0: 10259.6. Samples: 424463316. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:23,977][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:24,430][626795] Updated weights for policy 0, policy_version 329332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:26,507][626795] Updated weights for policy 0, policy_version 329342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:28,370][626795] Updated weights for policy 0, policy_version 329352 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:28,975][24592] Fps is (10 sec: 41780.3, 60 sec: 40550.4, 300 sec: 39543.8). Total num frames: 2698067968. Throughput: 0: 10279.2. Samples: 424494930. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:28,976][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:30,387][626795] Updated weights for policy 0, policy_version 329362 (0.0033)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:32,404][626795] Updated weights for policy 0, policy_version 329372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:33,975][24592] Fps is (10 sec: 40960.1, 60 sec: 41016.0, 300 sec: 39627.1). Total num frames: 2698272768. Throughput: 0: 10258.4. Samples: 424557618. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:33,976][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:34,387][626795] Updated weights for policy 0, policy_version 329382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:36,355][626795] Updated weights for policy 0, policy_version 329392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:38,291][626795] Updated weights for policy 0, policy_version 329402 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:38,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41233.0, 300 sec: 39793.8). Total num frames: 2698485760. Throughput: 0: 10209.3. Samples: 424619472. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:38,977][24592] Avg episode reward: [(0, '4.905')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:41,129][626795] Updated weights for policy 0, policy_version 329412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:43,036][626795] Updated weights for policy 0, policy_version 329422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:43,975][24592] Fps is (10 sec: 38502.4, 60 sec: 40550.5, 300 sec: 39710.4). Total num frames: 2698657792. Throughput: 0: 10029.6. Samples: 424642092. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:43,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:45,045][626795] Updated weights for policy 0, policy_version 329432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:46,958][626795] Updated weights for policy 0, policy_version 329442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:48,975][24592] Fps is (10 sec: 37683.9, 60 sec: 40550.4, 300 sec: 39738.2). Total num frames: 2698862592. Throughput: 0: 10027.8. Samples: 424704348. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:48,978][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:49,013][626795] Updated weights for policy 0, policy_version 329452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:50,892][626795] Updated weights for policy 0, policy_version 329462 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:52,982][626795] Updated weights for policy 0, policy_version 329472 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:53,976][24592] Fps is (10 sec: 41777.5, 60 sec: 40550.1, 300 sec: 39765.9). Total num frames: 2699075584. Throughput: 0: 10235.9. Samples: 424767120. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:53,977][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:54,928][626795] Updated weights for policy 0, policy_version 329482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:56,913][626795] Updated weights for policy 0, policy_version 329492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:35:58,831][626795] Updated weights for policy 0, policy_version 329502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:58,975][24592] Fps is (10 sec: 41779.0, 60 sec: 40550.4, 300 sec: 39765.9). Total num frames: 2699280384. Throughput: 0: 10243.5. Samples: 424797564. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:35:58,977][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:00,896][626795] Updated weights for policy 0, policy_version 329512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:02,675][626795] Updated weights for policy 0, policy_version 329522 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:03,977][24592] Fps is (10 sec: 41776.2, 60 sec: 40549.8, 300 sec: 39821.3). Total num frames: 2699493376. Throughput: 0: 10261.9. Samples: 424861218. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:03,979][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:04,702][626795] Updated weights for policy 0, policy_version 329532 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:06,638][626795] Updated weights for policy 0, policy_version 329542 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:08,582][626795] Updated weights for policy 0, policy_version 329552 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:08,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41233.1, 300 sec: 39877.0). Total num frames: 2699706368. Throughput: 0: 10241.5. Samples: 424924182. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:08,977][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:10,594][626795] Updated weights for policy 0, policy_version 329562 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:13,333][626795] Updated weights for policy 0, policy_version 329572 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:13,975][24592] Fps is (10 sec: 38506.7, 60 sec: 40687.0, 300 sec: 39877.0). Total num frames: 2699878400. Throughput: 0: 10217.3. Samples: 424954710. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:13,977][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:15,306][626795] Updated weights for policy 0, policy_version 329582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:17,299][626795] Updated weights for policy 0, policy_version 329592 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:18,975][24592] Fps is (10 sec: 37682.9, 60 sec: 40550.5, 300 sec: 39877.0). Total num frames: 2700083200. Throughput: 0: 10032.7. Samples: 425009088. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:18,978][24592] Avg episode reward: [(0, '4.556')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:19,377][626795] Updated weights for policy 0, policy_version 329602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:21,135][626795] Updated weights for policy 0, policy_version 329612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:23,104][626795] Updated weights for policy 0, policy_version 329622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:23,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40550.4, 300 sec: 39904.9). Total num frames: 2700296192. Throughput: 0: 10056.4. Samples: 425072010. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:23,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:25,057][626795] Updated weights for policy 0, policy_version 329632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:26,935][626795] Updated weights for policy 0, policy_version 329642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:28,916][626795] Updated weights for policy 0, policy_version 329652 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:28,975][24592] Fps is (10 sec: 42598.6, 60 sec: 40686.9, 300 sec: 39932.6). Total num frames: 2700509184. Throughput: 0: 10263.1. Samples: 425103930. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:28,977][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:30,872][626795] Updated weights for policy 0, policy_version 329662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:32,736][626795] Updated weights for policy 0, policy_version 329672 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:33,976][24592] Fps is (10 sec: 42598.0, 60 sec: 40823.4, 300 sec: 39960.3). Total num frames: 2700722176. Throughput: 0: 10297.2. Samples: 425167722. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:33,978][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:34,687][626795] Updated weights for policy 0, policy_version 329682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:36,639][626795] Updated weights for policy 0, policy_version 329692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:38,540][626795] Updated weights for policy 0, policy_version 329702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:38,976][24592] Fps is (10 sec: 41777.5, 60 sec: 40686.8, 300 sec: 39988.1). Total num frames: 2700926976. Throughput: 0: 10320.7. Samples: 425231550. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:38,977][24592] Avg episode reward: [(0, '4.531')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:40,556][626795] Updated weights for policy 0, policy_version 329712 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:42,509][626795] Updated weights for policy 0, policy_version 329722 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:43,977][24592] Fps is (10 sec: 41773.6, 60 sec: 41368.6, 300 sec: 40097.5). Total num frames: 2701139968. Throughput: 0: 10324.6. Samples: 425262186. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:43,978][24592] Avg episode reward: [(0, '4.429')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:44,536][626795] Updated weights for policy 0, policy_version 329732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:47,305][626795] Updated weights for policy 0, policy_version 329742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:48,976][24592] Fps is (10 sec: 38502.9, 60 sec: 40823.3, 300 sec: 40015.8). Total num frames: 2701312000. Throughput: 0: 10104.9. Samples: 425315928. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:48,977][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:49,170][626795] Updated weights for policy 0, policy_version 329752 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:51,217][626795] Updated weights for policy 0, policy_version 329762 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:53,186][626795] Updated weights for policy 0, policy_version 329772 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:53,975][24592] Fps is (10 sec: 38507.9, 60 sec: 40823.7, 300 sec: 40043.6). Total num frames: 2701524992. Throughput: 0: 10099.5. Samples: 425378658. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:53,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:55,110][626795] Updated weights for policy 0, policy_version 329782 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:56,999][626795] Updated weights for policy 0, policy_version 329792 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:36:58,958][626795] Updated weights for policy 0, policy_version 329802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:58,975][24592] Fps is (10 sec: 42599.4, 60 sec: 40960.0, 300 sec: 40043.6). Total num frames: 2701737984. Throughput: 0: 10125.6. Samples: 425410362. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:36:58,977][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:00,903][626795] Updated weights for policy 0, policy_version 329812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:02,696][626795] Updated weights for policy 0, policy_version 329822 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:03,975][24592] Fps is (10 sec: 42598.1, 60 sec: 40960.7, 300 sec: 40099.1). Total num frames: 2701950976. Throughput: 0: 10359.2. Samples: 425475252. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:03,976][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000329828_2701950976.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:04,059][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000328638_2692202496.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:04,777][626795] Updated weights for policy 0, policy_version 329832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:06,781][626795] Updated weights for policy 0, policy_version 329842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:08,677][626795] Updated weights for policy 0, policy_version 329852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:08,975][24592] Fps is (10 sec: 41779.4, 60 sec: 40823.5, 300 sec: 40126.9). Total num frames: 2702155776. Throughput: 0: 10353.6. Samples: 425537922. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:08,977][24592] Avg episode reward: [(0, '4.908')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:10,735][626795] Updated weights for policy 0, policy_version 329862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:12,516][626795] Updated weights for policy 0, policy_version 329872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:13,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41506.1, 300 sec: 40154.7). Total num frames: 2702368768. Throughput: 0: 10331.5. Samples: 425568846. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:13,977][24592] Avg episode reward: [(0, '4.960')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:14,605][626795] Updated weights for policy 0, policy_version 329882 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:16,545][626795] Updated weights for policy 0, policy_version 329892 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:19,005][24592] Fps is (10 sec: 38390.3, 60 sec: 40940.1, 300 sec: 40150.7). Total num frames: 2702540800. Throughput: 0: 10295.1. Samples: 425631300. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:19,005][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:19,395][626795] Updated weights for policy 0, policy_version 329902 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:21,309][626795] Updated weights for policy 0, policy_version 329912 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:23,274][626795] Updated weights for policy 0, policy_version 329922 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:23,975][24592] Fps is (10 sec: 37683.1, 60 sec: 40823.4, 300 sec: 40182.5). Total num frames: 2702745600. Throughput: 0: 10066.1. Samples: 425684520. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:23,976][24592] Avg episode reward: [(0, '4.775')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:25,303][626795] Updated weights for policy 0, policy_version 329932 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:27,241][626795] Updated weights for policy 0, policy_version 329942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:28,976][24592] Fps is (10 sec: 41899.5, 60 sec: 40823.1, 300 sec: 40210.2). Total num frames: 2702958592. Throughput: 0: 10087.7. Samples: 425716122. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:28,977][24592] Avg episode reward: [(0, '4.383')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:29,062][626795] Updated weights for policy 0, policy_version 329952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:31,027][626795] Updated weights for policy 0, policy_version 329962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:32,904][626795] Updated weights for policy 0, policy_version 329972 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:33,976][24592] Fps is (10 sec: 42595.9, 60 sec: 40823.1, 300 sec: 40238.4). Total num frames: 2703171584. Throughput: 0: 10313.0. Samples: 425780016. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:33,982][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:34,980][626795] Updated weights for policy 0, policy_version 329982 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:36,772][626795] Updated weights for policy 0, policy_version 329992 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:38,797][626795] Updated weights for policy 0, policy_version 330002 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:38,975][24592] Fps is (10 sec: 42600.4, 60 sec: 40960.3, 300 sec: 40323.5). Total num frames: 2703384576. Throughput: 0: 10337.1. Samples: 425843826. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:38,977][24592] Avg episode reward: [(0, '4.529')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:40,606][626795] Updated weights for policy 0, policy_version 330012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:42,629][626795] Updated weights for policy 0, policy_version 330022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:43,975][24592] Fps is (10 sec: 42601.1, 60 sec: 40961.0, 300 sec: 40432.4). Total num frames: 2703597568. Throughput: 0: 10346.8. Samples: 425875968. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:43,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:44,604][626795] Updated weights for policy 0, policy_version 330032 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:46,580][626795] Updated weights for policy 0, policy_version 330042 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:48,426][626795] Updated weights for policy 0, policy_version 330052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:48,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41506.3, 300 sec: 40515.8). Total num frames: 2703802368. Throughput: 0: 10292.7. Samples: 425938422. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:48,977][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:50,468][626795] Updated weights for policy 0, policy_version 330062 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:53,294][626795] Updated weights for policy 0, policy_version 330072 (0.0731)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:53,975][24592] Fps is (10 sec: 37682.9, 60 sec: 40823.4, 300 sec: 40599.0). Total num frames: 2703974400. Throughput: 0: 10104.9. Samples: 425992644. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:53,977][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:55,242][626795] Updated weights for policy 0, policy_version 330082 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:57,031][626795] Updated weights for policy 0, policy_version 330092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:58,976][24592] Fps is (10 sec: 38501.5, 60 sec: 40823.3, 300 sec: 40626.7). Total num frames: 2704187392. Throughput: 0: 10109.2. Samples: 426023760. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:37:58,977][24592] Avg episode reward: [(0, '4.908')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:37:59,069][626795] Updated weights for policy 0, policy_version 330102 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:00,907][626795] Updated weights for policy 0, policy_version 330112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:02,794][626795] Updated weights for policy 0, policy_version 330122 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:03,975][24592] Fps is (10 sec: 43417.6, 60 sec: 40960.0, 300 sec: 40710.1). Total num frames: 2704408576. Throughput: 0: 10174.5. Samples: 426088854. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:03,977][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:04,751][626795] Updated weights for policy 0, policy_version 330132 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:06,613][626795] Updated weights for policy 0, policy_version 330142 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:08,485][626795] Updated weights for policy 0, policy_version 330152 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:08,976][24592] Fps is (10 sec: 43415.8, 60 sec: 41096.1, 300 sec: 40793.4). Total num frames: 2704621568. Throughput: 0: 10418.9. Samples: 426153378. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:08,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:10,501][626795] Updated weights for policy 0, policy_version 330162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:12,382][626795] Updated weights for policy 0, policy_version 330172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:13,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41096.6, 300 sec: 40793.4). Total num frames: 2704834560. Throughput: 0: 10419.4. Samples: 426184992. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:13,976][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:14,379][626795] Updated weights for policy 0, policy_version 330182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:16,383][626795] Updated weights for policy 0, policy_version 330192 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:18,100][626795] Updated weights for policy 0, policy_version 330202 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:18,976][24592] Fps is (10 sec: 42597.8, 60 sec: 41799.0, 300 sec: 40821.1). Total num frames: 2705047552. Throughput: 0: 10415.8. Samples: 426248730. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:18,978][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:20,188][626795] Updated weights for policy 0, policy_version 330212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:22,069][626795] Updated weights for policy 0, policy_version 330222 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41779.2, 300 sec: 40932.2). Total num frames: 2705252352. Throughput: 0: 10418.3. Samples: 426312648. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:23,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:24,105][626795] Updated weights for policy 0, policy_version 330232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:26,667][626795] Updated weights for policy 0, policy_version 330242 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:28,590][626795] Updated weights for policy 0, policy_version 330252 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:28,975][24592] Fps is (10 sec: 38505.4, 60 sec: 41233.4, 300 sec: 40848.9). Total num frames: 2705432576. Throughput: 0: 10222.7. Samples: 426335988. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:28,977][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:30,596][626795] Updated weights for policy 0, policy_version 330262 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:32,496][626795] Updated weights for policy 0, policy_version 330272 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:33,975][24592] Fps is (10 sec: 39321.5, 60 sec: 41233.5, 300 sec: 40849.1). Total num frames: 2705645568. Throughput: 0: 10241.6. Samples: 426399294. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:33,978][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:34,484][626795] Updated weights for policy 0, policy_version 330282 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:36,365][626795] Updated weights for policy 0, policy_version 330292 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:38,292][626795] Updated weights for policy 0, policy_version 330302 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:38,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41233.1, 300 sec: 40876.8). Total num frames: 2705858560. Throughput: 0: 10460.2. Samples: 426463350. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:38,977][24592] Avg episode reward: [(0, '4.958')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:40,276][626795] Updated weights for policy 0, policy_version 330312 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:42,217][626795] Updated weights for policy 0, policy_version 330322 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:43,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41233.1, 300 sec: 40876.7). Total num frames: 2706071552. Throughput: 0: 10469.3. Samples: 426494874. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:43,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:44,046][626795] Updated weights for policy 0, policy_version 330332 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:46,110][626795] Updated weights for policy 0, policy_version 330342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:47,948][626795] Updated weights for policy 0, policy_version 330352 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:48,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41369.6, 300 sec: 40904.5). Total num frames: 2706284544. Throughput: 0: 10435.6. Samples: 426558456. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:48,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:49,917][626795] Updated weights for policy 0, policy_version 330362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:51,759][626795] Updated weights for policy 0, policy_version 330372 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:53,678][626795] Updated weights for policy 0, policy_version 330382 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:53,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42052.3, 300 sec: 40932.3). Total num frames: 2706497536. Throughput: 0: 10419.1. Samples: 426622230. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:53,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:55,705][626795] Updated weights for policy 0, policy_version 330392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:38:58,325][626795] Updated weights for policy 0, policy_version 330402 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:58,975][24592] Fps is (10 sec: 39321.2, 60 sec: 41506.2, 300 sec: 40987.8). Total num frames: 2706677760. Throughput: 0: 10416.8. Samples: 426653748. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:38:58,976][24592] Avg episode reward: [(0, '5.065')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:00,273][626795] Updated weights for policy 0, policy_version 330412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:02,193][626795] Updated weights for policy 0, policy_version 330422 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:03,975][24592] Fps is (10 sec: 38502.4, 60 sec: 41233.1, 300 sec: 40960.0). Total num frames: 2706882560. Throughput: 0: 10240.2. Samples: 426709530. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:03,977][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000330431_2706890752.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:04,115][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000329231_2697060352.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:04,250][626795] Updated weights for policy 0, policy_version 330432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:06,179][626795] Updated weights for policy 0, policy_version 330442 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:08,100][626795] Updated weights for policy 0, policy_version 330452 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:08,976][24592] Fps is (10 sec: 41777.8, 60 sec: 41233.2, 300 sec: 40987.7). Total num frames: 2707095552. Throughput: 0: 10218.2. Samples: 426772470. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:08,977][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:09,961][626795] Updated weights for policy 0, policy_version 330462 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:11,933][626795] Updated weights for policy 0, policy_version 330472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:13,916][626795] Updated weights for policy 0, policy_version 330482 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:13,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41233.0, 300 sec: 40987.8). Total num frames: 2707308544. Throughput: 0: 10406.9. Samples: 426804300. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:13,976][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:15,797][626795] Updated weights for policy 0, policy_version 330492 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:17,651][626795] Updated weights for policy 0, policy_version 330502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:18,976][24592] Fps is (10 sec: 42598.9, 60 sec: 41233.4, 300 sec: 40987.7). Total num frames: 2707521536. Throughput: 0: 10418.7. Samples: 426868140. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:18,977][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:19,718][626795] Updated weights for policy 0, policy_version 330512 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:21,579][626795] Updated weights for policy 0, policy_version 330522 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:23,444][626795] Updated weights for policy 0, policy_version 330532 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:23,976][24592] Fps is (10 sec: 41778.0, 60 sec: 41232.8, 300 sec: 40987.7). Total num frames: 2707726336. Throughput: 0: 10414.3. Samples: 426931998. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:23,976][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:25,505][626795] Updated weights for policy 0, policy_version 330542 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:27,413][626795] Updated weights for policy 0, policy_version 330552 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:28,975][24592] Fps is (10 sec: 42600.0, 60 sec: 41915.8, 300 sec: 41138.0). Total num frames: 2707947520. Throughput: 0: 10404.1. Samples: 426963060. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:28,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:29,392][626795] Updated weights for policy 0, policy_version 330562 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:31,941][626795] Updated weights for policy 0, policy_version 330572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:33,874][626795] Updated weights for policy 0, policy_version 330582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:33,975][24592] Fps is (10 sec: 40141.7, 60 sec: 41369.6, 300 sec: 41071.1). Total num frames: 2708127744. Throughput: 0: 10242.6. Samples: 427019376. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:33,976][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:35,922][626795] Updated weights for policy 0, policy_version 330592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:37,728][626795] Updated weights for policy 0, policy_version 330602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:38,975][24592] Fps is (10 sec: 39321.6, 60 sec: 41369.6, 300 sec: 41071.1). Total num frames: 2708340736. Throughput: 0: 10248.9. Samples: 427083432. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:38,977][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:39,629][626795] Updated weights for policy 0, policy_version 330612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:41,637][626795] Updated weights for policy 0, policy_version 330622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:43,463][626795] Updated weights for policy 0, policy_version 330632 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:43,975][24592] Fps is (10 sec: 42599.0, 60 sec: 41369.6, 300 sec: 41098.9). Total num frames: 2708553728. Throughput: 0: 10252.8. Samples: 427115124. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:43,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:45,449][626795] Updated weights for policy 0, policy_version 330642 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:47,382][626795] Updated weights for policy 0, policy_version 330652 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:48,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41369.6, 300 sec: 41098.8). Total num frames: 2708766720. Throughput: 0: 10434.3. Samples: 427179072. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:48,977][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:49,420][626795] Updated weights for policy 0, policy_version 330662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:51,250][626795] Updated weights for policy 0, policy_version 330672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:53,194][626795] Updated weights for policy 0, policy_version 330682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:53,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41233.1, 300 sec: 41098.9). Total num frames: 2708971520. Throughput: 0: 10423.4. Samples: 427241520. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:53,985][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:55,113][626795] Updated weights for policy 0, policy_version 330692 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:57,128][626795] Updated weights for policy 0, policy_version 330702 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:58,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41779.3, 300 sec: 41098.9). Total num frames: 2709184512. Throughput: 0: 10419.7. Samples: 427273188. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:39:58,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:39:59,043][626795] Updated weights for policy 0, policy_version 330712 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:00,923][626795] Updated weights for policy 0, policy_version 330722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:02,847][626795] Updated weights for policy 0, policy_version 330732 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:03,975][24592] Fps is (10 sec: 40140.6, 60 sec: 41506.1, 300 sec: 41154.4). Total num frames: 2709372928. Throughput: 0: 10429.3. Samples: 427337454. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:03,976][24592] Avg episode reward: [(0, '4.868')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:05,586][626795] Updated weights for policy 0, policy_version 330742 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:07,571][626795] Updated weights for policy 0, policy_version 330752 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:08,975][24592] Fps is (10 sec: 39321.8, 60 sec: 41369.9, 300 sec: 41154.4). Total num frames: 2709577728. Throughput: 0: 10251.7. Samples: 427393320. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:08,976][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:09,475][626795] Updated weights for policy 0, policy_version 330762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:11,367][626795] Updated weights for policy 0, policy_version 330772 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:13,306][626795] Updated weights for policy 0, policy_version 330782 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:13,976][24592] Fps is (10 sec: 41776.1, 60 sec: 41369.1, 300 sec: 41154.3). Total num frames: 2709790720. Throughput: 0: 10264.3. Samples: 427424964. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:13,977][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:15,256][626795] Updated weights for policy 0, policy_version 330792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:17,161][626795] Updated weights for policy 0, policy_version 330802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:18,975][24592] Fps is (10 sec: 42597.8, 60 sec: 41369.7, 300 sec: 41154.4). Total num frames: 2710003712. Throughput: 0: 10408.9. Samples: 427487778. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:18,977][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:19,078][626795] Updated weights for policy 0, policy_version 330812 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:21,029][626795] Updated weights for policy 0, policy_version 330822 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:22,950][626795] Updated weights for policy 0, policy_version 330832 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:23,975][24592] Fps is (10 sec: 42601.9, 60 sec: 41506.4, 300 sec: 41182.2). Total num frames: 2710216704. Throughput: 0: 10406.1. Samples: 427551708. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:23,977][24592] Avg episode reward: [(0, '4.916')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:24,889][626795] Updated weights for policy 0, policy_version 330842 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:26,826][626795] Updated weights for policy 0, policy_version 330852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:28,851][626795] Updated weights for policy 0, policy_version 330862 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:28,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41233.0, 300 sec: 41182.1). Total num frames: 2710421504. Throughput: 0: 10413.0. Samples: 427583712. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:28,978][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:30,850][626795] Updated weights for policy 0, policy_version 330872 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:32,694][626795] Updated weights for policy 0, policy_version 330882 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:33,975][24592] Fps is (10 sec: 41778.8, 60 sec: 41779.2, 300 sec: 41182.2). Total num frames: 2710634496. Throughput: 0: 10382.9. Samples: 427646304. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:33,976][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:34,664][626795] Updated weights for policy 0, policy_version 330892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:37,340][626795] Updated weights for policy 0, policy_version 330902 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:38,975][24592] Fps is (10 sec: 39321.7, 60 sec: 41233.0, 300 sec: 41209.9). Total num frames: 2710814720. Throughput: 0: 10241.1. Samples: 427702368. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:38,979][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:39,323][626795] Updated weights for policy 0, policy_version 330912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:41,177][626795] Updated weights for policy 0, policy_version 330922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:43,075][626795] Updated weights for policy 0, policy_version 330932 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:43,975][24592] Fps is (10 sec: 39321.7, 60 sec: 41233.0, 300 sec: 41237.7). Total num frames: 2711027712. Throughput: 0: 10224.5. Samples: 427733292. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:43,977][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:45,090][626795] Updated weights for policy 0, policy_version 330942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:46,980][626795] Updated weights for policy 0, policy_version 330952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:48,926][626795] Updated weights for policy 0, policy_version 330962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:48,978][24592] Fps is (10 sec: 42587.6, 60 sec: 41231.3, 300 sec: 41237.4). Total num frames: 2711240704. Throughput: 0: 10209.0. Samples: 427796886. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:48,979][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:50,925][626795] Updated weights for policy 0, policy_version 330972 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:52,869][626795] Updated weights for policy 0, policy_version 330982 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:53,976][24592] Fps is (10 sec: 42598.0, 60 sec: 41369.5, 300 sec: 41265.4). Total num frames: 2711453696. Throughput: 0: 10384.9. Samples: 427860642. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:53,977][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:54,863][626795] Updated weights for policy 0, policy_version 330992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:56,628][626795] Updated weights for policy 0, policy_version 331002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:40:58,605][626795] Updated weights for policy 0, policy_version 331012 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:58,975][24592] Fps is (10 sec: 41790.4, 60 sec: 41233.1, 300 sec: 41237.9). Total num frames: 2711658496. Throughput: 0: 10373.8. Samples: 427891776. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:40:58,976][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:00,628][626795] Updated weights for policy 0, policy_version 331022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:02,535][626795] Updated weights for policy 0, policy_version 331032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:03,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41642.4, 300 sec: 41237.6). Total num frames: 2711871488. Throughput: 0: 10396.7. Samples: 427955634. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:03,979][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:04,037][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000331040_2711879680.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:04,173][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000329828_2701950976.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:04,563][626795] Updated weights for policy 0, policy_version 331042 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:06,533][626795] Updated weights for policy 0, policy_version 331052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:08,430][626795] Updated weights for policy 0, policy_version 331062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:08,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41779.2, 300 sec: 41376.5). Total num frames: 2712084480. Throughput: 0: 10367.2. Samples: 428018232. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:08,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:11,152][626795] Updated weights for policy 0, policy_version 331072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:13,039][626795] Updated weights for policy 0, policy_version 331082 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:13,975][24592] Fps is (10 sec: 38503.7, 60 sec: 41097.1, 300 sec: 41265.5). Total num frames: 2712256512. Throughput: 0: 10174.5. Samples: 428041566. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:13,978][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:14,994][626795] Updated weights for policy 0, policy_version 331092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:16,897][626795] Updated weights for policy 0, policy_version 331102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:18,800][626795] Updated weights for policy 0, policy_version 331112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:18,975][24592] Fps is (10 sec: 39321.7, 60 sec: 41233.2, 300 sec: 41293.2). Total num frames: 2712477696. Throughput: 0: 10205.4. Samples: 428105544. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:18,977][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:20,624][626795] Updated weights for policy 0, policy_version 331122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:22,628][626795] Updated weights for policy 0, policy_version 331132 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:23,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41096.5, 300 sec: 41265.5). Total num frames: 2712682496. Throughput: 0: 10390.7. Samples: 428169948. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:23,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:24,620][626795] Updated weights for policy 0, policy_version 331142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:26,541][626795] Updated weights for policy 0, policy_version 331152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:28,350][626795] Updated weights for policy 0, policy_version 331162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:28,976][24592] Fps is (10 sec: 41776.8, 60 sec: 41232.8, 300 sec: 41265.4). Total num frames: 2712895488. Throughput: 0: 10414.8. Samples: 428201964. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:28,977][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:30,338][626795] Updated weights for policy 0, policy_version 331172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:32,196][626795] Updated weights for policy 0, policy_version 331182 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:33,975][24592] Fps is (10 sec: 43417.4, 60 sec: 41369.6, 300 sec: 41321.1). Total num frames: 2713116672. Throughput: 0: 10425.0. Samples: 428265984. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:33,976][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:34,230][626795] Updated weights for policy 0, policy_version 331192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:35,941][626795] Updated weights for policy 0, policy_version 331202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:37,916][626795] Updated weights for policy 0, policy_version 331212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:38,975][24592] Fps is (10 sec: 43420.0, 60 sec: 41915.8, 300 sec: 41321.2). Total num frames: 2713329664. Throughput: 0: 10423.1. Samples: 428329680. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:38,977][24592] Avg episode reward: [(0, '4.838')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:40,069][626795] Updated weights for policy 0, policy_version 331222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:41,806][626795] Updated weights for policy 0, policy_version 331232 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:43,975][24592] Fps is (10 sec: 39321.9, 60 sec: 41369.7, 300 sec: 41348.8). Total num frames: 2713509888. Throughput: 0: 10432.5. Samples: 428361240. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:43,976][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:44,631][626795] Updated weights for policy 0, policy_version 331242 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:46,517][626795] Updated weights for policy 0, policy_version 331252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:48,362][626795] Updated weights for policy 0, policy_version 331262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:48,987][24592] Fps is (10 sec: 38458.2, 60 sec: 41227.0, 300 sec: 41319.4). Total num frames: 2713714688. Throughput: 0: 10247.3. Samples: 428416878. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:48,988][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:50,430][626795] Updated weights for policy 0, policy_version 331272 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:52,374][626795] Updated weights for policy 0, policy_version 331282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:53,976][24592] Fps is (10 sec: 42596.8, 60 sec: 41369.5, 300 sec: 41348.7). Total num frames: 2713935872. Throughput: 0: 10280.6. Samples: 428480862. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:53,978][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:54,290][626795] Updated weights for policy 0, policy_version 331292 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:56,116][626795] Updated weights for policy 0, policy_version 331302 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:41:58,095][626795] Updated weights for policy 0, policy_version 331312 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:58,976][24592] Fps is (10 sec: 42646.4, 60 sec: 41369.4, 300 sec: 41321.0). Total num frames: 2714140672. Throughput: 0: 10461.0. Samples: 428512314. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:41:58,978][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:00,061][626795] Updated weights for policy 0, policy_version 331322 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:01,546][626772] Signal inference workers to stop experience collection... (5550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:01,548][626772] Signal inference workers to resume experience collection... (5550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:01,562][626795] InferenceWorker_p0-w0: stopping experience collection (5550 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:01,565][626795] InferenceWorker_p0-w0: resuming experience collection (5550 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:02,066][626795] Updated weights for policy 0, policy_version 331332 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:03,847][626795] Updated weights for policy 0, policy_version 331342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:03,975][24592] Fps is (10 sec: 41780.3, 60 sec: 41369.8, 300 sec: 41348.8). Total num frames: 2714353664. Throughput: 0: 10441.3. Samples: 428575404. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:03,978][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:05,928][626795] Updated weights for policy 0, policy_version 331352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:07,707][626795] Updated weights for policy 0, policy_version 331362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:08,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41233.1, 300 sec: 41321.0). Total num frames: 2714558464. Throughput: 0: 10428.9. Samples: 428639250. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:08,977][24592] Avg episode reward: [(0, '4.767')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:09,726][626795] Updated weights for policy 0, policy_version 331372 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:11,624][626795] Updated weights for policy 0, policy_version 331382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:13,645][626795] Updated weights for policy 0, policy_version 331392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41915.7, 300 sec: 41463.9). Total num frames: 2714771456. Throughput: 0: 10423.7. Samples: 428671026. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:13,976][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:16,370][626795] Updated weights for policy 0, policy_version 331402 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:18,298][626795] Updated weights for policy 0, policy_version 331412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:18,975][24592] Fps is (10 sec: 38502.3, 60 sec: 41096.5, 300 sec: 41348.8). Total num frames: 2714943488. Throughput: 0: 10212.9. Samples: 428725566. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:18,976][24592] Avg episode reward: [(0, '4.351')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:20,270][626795] Updated weights for policy 0, policy_version 331422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:22,176][626795] Updated weights for policy 0, policy_version 331432 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:23,975][24592] Fps is (10 sec: 38502.7, 60 sec: 41233.0, 300 sec: 41348.8). Total num frames: 2715156480. Throughput: 0: 10201.1. Samples: 428788728. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:23,976][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:24,195][626795] Updated weights for policy 0, policy_version 331442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:26,106][626795] Updated weights for policy 0, policy_version 331452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:28,017][626795] Updated weights for policy 0, policy_version 331462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:28,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41233.4, 300 sec: 41348.9). Total num frames: 2715369472. Throughput: 0: 10196.8. Samples: 428820096. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:28,977][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:30,025][626795] Updated weights for policy 0, policy_version 331472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:31,960][626795] Updated weights for policy 0, policy_version 331482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:33,826][626795] Updated weights for policy 0, policy_version 331492 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:33,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41096.6, 300 sec: 41348.8). Total num frames: 2715582464. Throughput: 0: 10373.5. Samples: 428883564. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:33,976][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:35,848][626795] Updated weights for policy 0, policy_version 331502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:37,654][626795] Updated weights for policy 0, policy_version 331512 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:38,976][24592] Fps is (10 sec: 42597.4, 60 sec: 41096.4, 300 sec: 41348.7). Total num frames: 2715795456. Throughput: 0: 10367.9. Samples: 428947416. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:38,978][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:39,726][626795] Updated weights for policy 0, policy_version 331522 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:41,700][626795] Updated weights for policy 0, policy_version 331532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:43,709][626795] Updated weights for policy 0, policy_version 331542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:43,976][24592] Fps is (10 sec: 41776.9, 60 sec: 41505.8, 300 sec: 41348.7). Total num frames: 2716000256. Throughput: 0: 10330.9. Samples: 428977206. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:43,979][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:45,619][626795] Updated weights for policy 0, policy_version 331552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:47,725][626795] Updated weights for policy 0, policy_version 331562 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:48,975][24592] Fps is (10 sec: 38503.5, 60 sec: 41104.4, 300 sec: 41376.6). Total num frames: 2716180480. Throughput: 0: 10313.4. Samples: 429039504. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:48,977][24592] Avg episode reward: [(0, '4.571')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:50,344][626795] Updated weights for policy 0, policy_version 331572 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:52,267][626795] Updated weights for policy 0, policy_version 331582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:53,975][24592] Fps is (10 sec: 38504.1, 60 sec: 40823.7, 300 sec: 41348.8). Total num frames: 2716385280. Throughput: 0: 10120.7. Samples: 429094680. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:53,977][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:54,267][626795] Updated weights for policy 0, policy_version 331592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:56,327][626795] Updated weights for policy 0, policy_version 331602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:42:58,122][626795] Updated weights for policy 0, policy_version 331612 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:58,976][24592] Fps is (10 sec: 40959.5, 60 sec: 40823.6, 300 sec: 41293.2). Total num frames: 2716590080. Throughput: 0: 10104.5. Samples: 429125730. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:42:58,977][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:00,222][626795] Updated weights for policy 0, policy_version 331622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:02,151][626795] Updated weights for policy 0, policy_version 331632 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40823.5, 300 sec: 41293.3). Total num frames: 2716803072. Throughput: 0: 10279.7. Samples: 429188154. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:03,977][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000331641_2716803072.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:04,118][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000330431_2706890752.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:04,165][626795] Updated weights for policy 0, policy_version 331642 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:06,081][626795] Updated weights for policy 0, policy_version 331652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:08,072][626795] Updated weights for policy 0, policy_version 331662 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:08,976][24592] Fps is (10 sec: 42598.1, 60 sec: 40959.9, 300 sec: 41293.2). Total num frames: 2717016064. Throughput: 0: 10267.6. Samples: 429250770. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:08,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:10,105][626795] Updated weights for policy 0, policy_version 331672 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:11,904][626795] Updated weights for policy 0, policy_version 331682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:13,902][626795] Updated weights for policy 0, policy_version 331692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:13,976][24592] Fps is (10 sec: 41776.7, 60 sec: 40823.1, 300 sec: 41265.5). Total num frames: 2717220864. Throughput: 0: 10265.2. Samples: 429282036. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:13,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:15,824][626795] Updated weights for policy 0, policy_version 331702 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:17,816][626795] Updated weights for policy 0, policy_version 331712 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:18,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41506.1, 300 sec: 41293.2). Total num frames: 2717433856. Throughput: 0: 10276.4. Samples: 429346002. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:18,976][24592] Avg episode reward: [(0, '4.809')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:19,675][626795] Updated weights for policy 0, policy_version 331722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:22,304][626795] Updated weights for policy 0, policy_version 331732 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:23,976][24592] Fps is (10 sec: 39320.3, 60 sec: 40959.4, 300 sec: 41293.1). Total num frames: 2717614080. Throughput: 0: 10093.2. Samples: 429401616. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:23,978][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:24,342][626795] Updated weights for policy 0, policy_version 331742 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:26,136][626795] Updated weights for policy 0, policy_version 331752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:28,101][626795] Updated weights for policy 0, policy_version 331762 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:28,976][24592] Fps is (10 sec: 38501.1, 60 sec: 40823.2, 300 sec: 41265.4). Total num frames: 2717818880. Throughput: 0: 10136.0. Samples: 429433326. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:28,978][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:30,103][626795] Updated weights for policy 0, policy_version 331772 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:32,040][626795] Updated weights for policy 0, policy_version 331782 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:33,975][24592] Fps is (10 sec: 42602.2, 60 sec: 40959.9, 300 sec: 41293.2). Total num frames: 2718040064. Throughput: 0: 10142.4. Samples: 429495912. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:33,975][626795] Updated weights for policy 0, policy_version 331792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:33,977][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:35,946][626795] Updated weights for policy 0, policy_version 331802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:37,897][626795] Updated weights for policy 0, policy_version 331812 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:38,975][24592] Fps is (10 sec: 42599.9, 60 sec: 40823.6, 300 sec: 41265.5). Total num frames: 2718244864. Throughput: 0: 10336.9. Samples: 429559842. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:38,976][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:39,825][626795] Updated weights for policy 0, policy_version 331822 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:41,747][626795] Updated weights for policy 0, policy_version 331832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:43,740][626795] Updated weights for policy 0, policy_version 331842 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:43,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40960.3, 300 sec: 41265.5). Total num frames: 2718457856. Throughput: 0: 10349.1. Samples: 429591438. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:43,979][24592] Avg episode reward: [(0, '4.356')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:45,708][626795] Updated weights for policy 0, policy_version 331852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:47,631][626795] Updated weights for policy 0, policy_version 331862 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:48,978][24592] Fps is (10 sec: 41769.3, 60 sec: 41367.9, 300 sec: 41237.4). Total num frames: 2718662656. Throughput: 0: 10357.7. Samples: 429654276. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:48,979][24592] Avg episode reward: [(0, '4.814')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:49,543][626795] Updated weights for policy 0, policy_version 331872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:51,505][626795] Updated weights for policy 0, policy_version 331882 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:53,448][626795] Updated weights for policy 0, policy_version 331892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:53,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41506.2, 300 sec: 41348.8). Total num frames: 2718875648. Throughput: 0: 10383.2. Samples: 429718014. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:53,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:56,198][626795] Updated weights for policy 0, policy_version 331902 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:43:58,089][626795] Updated weights for policy 0, policy_version 331912 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:58,975][24592] Fps is (10 sec: 39331.1, 60 sec: 41096.6, 300 sec: 41265.5). Total num frames: 2719055872. Throughput: 0: 10192.7. Samples: 429740700. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:43:58,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:00,077][626795] Updated weights for policy 0, policy_version 331922 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:01,959][626795] Updated weights for policy 0, policy_version 331932 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:03,943][626795] Updated weights for policy 0, policy_version 331942 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:03,975][24592] Fps is (10 sec: 39321.4, 60 sec: 41096.5, 300 sec: 41265.5). Total num frames: 2719268864. Throughput: 0: 10190.0. Samples: 429804552. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:03,976][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:05,906][626795] Updated weights for policy 0, policy_version 331952 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:07,734][626795] Updated weights for policy 0, policy_version 331962 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:08,977][24592] Fps is (10 sec: 42593.2, 60 sec: 41095.8, 300 sec: 41265.3). Total num frames: 2719481856. Throughput: 0: 10369.7. Samples: 429868254. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:08,978][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:09,747][626795] Updated weights for policy 0, policy_version 331972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:11,697][626795] Updated weights for policy 0, policy_version 331982 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:13,711][626795] Updated weights for policy 0, policy_version 331992 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:13,976][24592] Fps is (10 sec: 42596.6, 60 sec: 41233.2, 300 sec: 41265.4). Total num frames: 2719694848. Throughput: 0: 10352.9. Samples: 429899208. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:13,982][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:15,631][626795] Updated weights for policy 0, policy_version 332002 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:17,539][626795] Updated weights for policy 0, policy_version 332012 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:18,975][24592] Fps is (10 sec: 41784.0, 60 sec: 41096.5, 300 sec: 41265.5). Total num frames: 2719899648. Throughput: 0: 10379.3. Samples: 429962982. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:18,977][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:19,438][626795] Updated weights for policy 0, policy_version 332022 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:21,346][626795] Updated weights for policy 0, policy_version 332032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:23,226][626795] Updated weights for policy 0, policy_version 332042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:23,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41642.9, 300 sec: 41237.6). Total num frames: 2720112640. Throughput: 0: 10360.4. Samples: 430026066. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:23,977][24592] Avg episode reward: [(0, '4.897')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:25,286][626795] Updated weights for policy 0, policy_version 332052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:28,006][626795] Updated weights for policy 0, policy_version 332062 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:28,975][24592] Fps is (10 sec: 38502.4, 60 sec: 41096.7, 300 sec: 41209.9). Total num frames: 2720284672. Throughput: 0: 10354.1. Samples: 430057374. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:28,976][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:29,970][626795] Updated weights for policy 0, policy_version 332072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:31,966][626795] Updated weights for policy 0, policy_version 332082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:33,716][626795] Updated weights for policy 0, policy_version 332092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:33,975][24592] Fps is (10 sec: 38504.8, 60 sec: 40960.0, 300 sec: 41209.9). Total num frames: 2720497664. Throughput: 0: 10196.0. Samples: 430113072. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:33,976][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:35,777][626795] Updated weights for policy 0, policy_version 332102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:37,574][626795] Updated weights for policy 0, policy_version 332112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:38,975][24592] Fps is (10 sec: 43417.3, 60 sec: 41233.0, 300 sec: 41237.7). Total num frames: 2720718848. Throughput: 0: 10191.2. Samples: 430176618. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:38,978][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:39,667][626795] Updated weights for policy 0, policy_version 332122 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:41,573][626795] Updated weights for policy 0, policy_version 332132 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:43,490][626795] Updated weights for policy 0, policy_version 332142 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:43,975][24592] Fps is (10 sec: 42598.0, 60 sec: 41096.5, 300 sec: 41209.9). Total num frames: 2720923648. Throughput: 0: 10374.8. Samples: 430207566. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:43,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:45,472][626795] Updated weights for policy 0, policy_version 332152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:47,394][626795] Updated weights for policy 0, policy_version 332162 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:48,975][24592] Fps is (10 sec: 40960.5, 60 sec: 41098.2, 300 sec: 41209.9). Total num frames: 2721128448. Throughput: 0: 10364.8. Samples: 430270968. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:48,978][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:49,383][626795] Updated weights for policy 0, policy_version 332172 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:51,284][626795] Updated weights for policy 0, policy_version 332182 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:53,183][626795] Updated weights for policy 0, policy_version 332192 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:53,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41096.5, 300 sec: 41209.9). Total num frames: 2721341440. Throughput: 0: 10344.8. Samples: 430333758. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:53,976][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:55,212][626795] Updated weights for policy 0, policy_version 332202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:57,185][626795] Updated weights for policy 0, policy_version 332212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:58,976][24592] Fps is (10 sec: 42597.3, 60 sec: 41642.5, 300 sec: 41293.2). Total num frames: 2721554432. Throughput: 0: 10361.5. Samples: 430365474. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:44:58,978][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:44:59,107][626795] Updated weights for policy 0, policy_version 332222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:01,779][626795] Updated weights for policy 0, policy_version 332232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:03,698][626795] Updated weights for policy 0, policy_version 332242 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:03,975][24592] Fps is (10 sec: 39321.7, 60 sec: 41096.6, 300 sec: 41209.9). Total num frames: 2721734656. Throughput: 0: 10168.9. Samples: 430420584. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:03,976][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000332243_2721734656.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:04,112][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000331040_2711879680.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:05,803][626795] Updated weights for policy 0, policy_version 332252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:07,580][626795] Updated weights for policy 0, policy_version 332262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:08,976][24592] Fps is (10 sec: 39322.5, 60 sec: 41097.3, 300 sec: 41210.0). Total num frames: 2721947648. Throughput: 0: 10175.1. Samples: 430483938. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:08,977][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:09,699][626795] Updated weights for policy 0, policy_version 332272 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:11,554][626795] Updated weights for policy 0, policy_version 332282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:13,479][626795] Updated weights for policy 0, policy_version 332292 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:13,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40960.3, 300 sec: 41182.2). Total num frames: 2722152448. Throughput: 0: 10173.9. Samples: 430515198. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:13,977][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:15,524][626795] Updated weights for policy 0, policy_version 332302 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:17,442][626795] Updated weights for policy 0, policy_version 332312 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:18,975][24592] Fps is (10 sec: 41779.6, 60 sec: 41096.6, 300 sec: 41182.2). Total num frames: 2722365440. Throughput: 0: 10336.9. Samples: 430578234. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:18,976][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:19,366][626795] Updated weights for policy 0, policy_version 332322 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:21,331][626795] Updated weights for policy 0, policy_version 332332 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:23,158][626795] Updated weights for policy 0, policy_version 332342 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:23,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40960.4, 300 sec: 41182.2). Total num frames: 2722570240. Throughput: 0: 10323.4. Samples: 430641168. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:23,978][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:25,189][626795] Updated weights for policy 0, policy_version 332352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:27,068][626795] Updated weights for policy 0, policy_version 332362 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:28,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41642.7, 300 sec: 41182.2). Total num frames: 2722783232. Throughput: 0: 10343.2. Samples: 430673010. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:28,978][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:29,139][626795] Updated weights for policy 0, policy_version 332372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:31,070][626795] Updated weights for policy 0, policy_version 332382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:32,967][626795] Updated weights for policy 0, policy_version 332392 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:33,978][24592] Fps is (10 sec: 40138.4, 60 sec: 41232.7, 300 sec: 41209.9). Total num frames: 2722971648. Throughput: 0: 10345.3. Samples: 430736514. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:33,981][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:35,689][626795] Updated weights for policy 0, policy_version 332402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:37,594][626795] Updated weights for policy 0, policy_version 332412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:38,975][24592] Fps is (10 sec: 39321.6, 60 sec: 40960.1, 300 sec: 41182.2). Total num frames: 2723176448. Throughput: 0: 10184.8. Samples: 430792074. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:38,978][24592] Avg episode reward: [(0, '4.871')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:39,439][626795] Updated weights for policy 0, policy_version 332422 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:41,380][626795] Updated weights for policy 0, policy_version 332432 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:43,326][626795] Updated weights for policy 0, policy_version 332442 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:43,975][24592] Fps is (10 sec: 41781.2, 60 sec: 41096.5, 300 sec: 41182.5). Total num frames: 2723389440. Throughput: 0: 10191.0. Samples: 430824066. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:43,976][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:45,270][626795] Updated weights for policy 0, policy_version 332452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:47,175][626795] Updated weights for policy 0, policy_version 332462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:48,976][24592] Fps is (10 sec: 41775.6, 60 sec: 41095.9, 300 sec: 41154.3). Total num frames: 2723594240. Throughput: 0: 10365.4. Samples: 430887036. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:48,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:49,275][626795] Updated weights for policy 0, policy_version 332472 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:51,216][626795] Updated weights for policy 0, policy_version 332482 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:53,114][626795] Updated weights for policy 0, policy_version 332492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:53,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41096.5, 300 sec: 41182.1). Total num frames: 2723807232. Throughput: 0: 10348.5. Samples: 430949622. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:53,978][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:55,213][626795] Updated weights for policy 0, policy_version 332502 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:57,028][626795] Updated weights for policy 0, policy_version 332512 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:45:58,923][626795] Updated weights for policy 0, policy_version 332522 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:58,976][24592] Fps is (10 sec: 42600.7, 60 sec: 41096.5, 300 sec: 41182.2). Total num frames: 2724020224. Throughput: 0: 10346.2. Samples: 430980780. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:45:58,978][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:00,901][626795] Updated weights for policy 0, policy_version 332532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:02,872][626795] Updated weights for policy 0, policy_version 332542 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:03,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41642.7, 300 sec: 41182.2). Total num frames: 2724233216. Throughput: 0: 10366.7. Samples: 431044734. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:03,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:04,765][626795] Updated weights for policy 0, policy_version 332552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:07,435][626795] Updated weights for policy 0, policy_version 332562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:08,975][24592] Fps is (10 sec: 38503.8, 60 sec: 40960.0, 300 sec: 41182.2). Total num frames: 2724405248. Throughput: 0: 10215.7. Samples: 431100876. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:08,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:09,381][626795] Updated weights for policy 0, policy_version 332572 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:11,226][626795] Updated weights for policy 0, policy_version 332582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:13,205][626795] Updated weights for policy 0, policy_version 332592 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:13,975][24592] Fps is (10 sec: 38502.4, 60 sec: 41096.5, 300 sec: 41154.4). Total num frames: 2724618240. Throughput: 0: 10216.4. Samples: 431132748. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:13,977][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:15,161][626795] Updated weights for policy 0, policy_version 332602 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:17,010][626795] Updated weights for policy 0, policy_version 332612 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:18,976][24592] Fps is (10 sec: 42597.6, 60 sec: 41096.4, 300 sec: 41182.1). Total num frames: 2724831232. Throughput: 0: 10219.2. Samples: 431196372. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:18,976][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:18,996][626795] Updated weights for policy 0, policy_version 332622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:20,938][626795] Updated weights for policy 0, policy_version 332632 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:22,790][626795] Updated weights for policy 0, policy_version 332642 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:23,976][24592] Fps is (10 sec: 42595.8, 60 sec: 41232.7, 300 sec: 41182.2). Total num frames: 2725044224. Throughput: 0: 10381.1. Samples: 431259228. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:23,978][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:24,861][626795] Updated weights for policy 0, policy_version 332652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:26,840][626795] Updated weights for policy 0, policy_version 332662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:28,752][626795] Updated weights for policy 0, policy_version 332672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:28,976][24592] Fps is (10 sec: 42598.6, 60 sec: 41233.0, 300 sec: 41154.4). Total num frames: 2725257216. Throughput: 0: 10373.3. Samples: 431290866. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:28,979][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:30,668][626795] Updated weights for policy 0, policy_version 332682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:32,576][626795] Updated weights for policy 0, policy_version 332692 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:33,975][24592] Fps is (10 sec: 42600.7, 60 sec: 41643.1, 300 sec: 41154.4). Total num frames: 2725470208. Throughput: 0: 10393.7. Samples: 431354742. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:33,977][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:34,539][626795] Updated weights for policy 0, policy_version 332702 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:36,475][626795] Updated weights for policy 0, policy_version 332712 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:38,388][626795] Updated weights for policy 0, policy_version 332722 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:38,975][24592] Fps is (10 sec: 42599.0, 60 sec: 41779.2, 300 sec: 41265.5). Total num frames: 2725683200. Throughput: 0: 10418.7. Samples: 431418462. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:38,976][24592] Avg episode reward: [(0, '4.819')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:41,168][626795] Updated weights for policy 0, policy_version 332732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:43,027][626795] Updated weights for policy 0, policy_version 332742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:43,975][24592] Fps is (10 sec: 38502.6, 60 sec: 41096.6, 300 sec: 41156.0). Total num frames: 2725855232. Throughput: 0: 10232.2. Samples: 431441226. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:43,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:45,040][626795] Updated weights for policy 0, policy_version 332752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:46,914][626795] Updated weights for policy 0, policy_version 332762 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:48,845][626795] Updated weights for policy 0, policy_version 332772 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:48,975][24592] Fps is (10 sec: 38502.4, 60 sec: 41233.7, 300 sec: 41126.7). Total num frames: 2726068224. Throughput: 0: 10221.6. Samples: 431504706. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:48,976][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:50,868][626795] Updated weights for policy 0, policy_version 332782 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:52,672][626795] Updated weights for policy 0, policy_version 332792 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:53,975][24592] Fps is (10 sec: 43417.1, 60 sec: 41369.6, 300 sec: 41182.2). Total num frames: 2726289408. Throughput: 0: 10408.1. Samples: 431569242. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:53,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:54,688][626795] Updated weights for policy 0, policy_version 332802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:56,540][626795] Updated weights for policy 0, policy_version 332812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:46:58,453][626795] Updated weights for policy 0, policy_version 332822 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:58,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41233.3, 300 sec: 41154.4). Total num frames: 2726494208. Throughput: 0: 10406.2. Samples: 431601030. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:46:58,977][24592] Avg episode reward: [(0, '5.026')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:00,488][626795] Updated weights for policy 0, policy_version 332832 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:02,349][626795] Updated weights for policy 0, policy_version 332842 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:03,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41232.8, 300 sec: 41182.1). Total num frames: 2726707200. Throughput: 0: 10410.8. Samples: 431664858. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:03,977][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000332850_2726707200.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:04,132][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000331641_2716803072.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:04,384][626795] Updated weights for policy 0, policy_version 332852 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:06,267][626795] Updated weights for policy 0, policy_version 332862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:08,253][626795] Updated weights for policy 0, policy_version 332872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:08,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41915.7, 300 sec: 41182.2). Total num frames: 2726920192. Throughput: 0: 10399.5. Samples: 431727198. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:08,977][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:10,150][626795] Updated weights for policy 0, policy_version 332882 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:12,888][626795] Updated weights for policy 0, policy_version 332892 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:13,975][24592] Fps is (10 sec: 39323.0, 60 sec: 41369.6, 300 sec: 41209.9). Total num frames: 2727100416. Throughput: 0: 10395.5. Samples: 431758662. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:13,976][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:14,821][626795] Updated weights for policy 0, policy_version 332902 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:16,709][626795] Updated weights for policy 0, policy_version 332912 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:18,605][626795] Updated weights for policy 0, policy_version 332922 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:18,975][24592] Fps is (10 sec: 38502.1, 60 sec: 41233.2, 300 sec: 41182.1). Total num frames: 2727305216. Throughput: 0: 10215.2. Samples: 431814426. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:18,977][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:20,584][626795] Updated weights for policy 0, policy_version 332932 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:22,550][626795] Updated weights for policy 0, policy_version 332942 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41233.5, 300 sec: 41182.2). Total num frames: 2727518208. Throughput: 0: 10226.3. Samples: 431878644. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:23,976][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:24,500][626795] Updated weights for policy 0, policy_version 332952 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:26,296][626795] Updated weights for policy 0, policy_version 332962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:28,324][626795] Updated weights for policy 0, policy_version 332972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:28,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41233.2, 300 sec: 41182.1). Total num frames: 2727731200. Throughput: 0: 10410.0. Samples: 431909676. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:28,977][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:30,271][626795] Updated weights for policy 0, policy_version 332982 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:32,326][626795] Updated weights for policy 0, policy_version 332992 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:33,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41233.1, 300 sec: 41182.2). Total num frames: 2727944192. Throughput: 0: 10399.5. Samples: 431972682. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:33,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:34,134][626795] Updated weights for policy 0, policy_version 333002 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:36,065][626795] Updated weights for policy 0, policy_version 333012 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:38,002][626795] Updated weights for policy 0, policy_version 333022 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:38,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41233.1, 300 sec: 41210.0). Total num frames: 2728157184. Throughput: 0: 10390.5. Samples: 432036816. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:38,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:39,965][626795] Updated weights for policy 0, policy_version 333032 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:41,886][626795] Updated weights for policy 0, policy_version 333042 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:43,799][626795] Updated weights for policy 0, policy_version 333052 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:43,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41915.7, 300 sec: 41321.0). Total num frames: 2728370176. Throughput: 0: 10388.8. Samples: 432068526. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:43,977][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:46,435][626795] Updated weights for policy 0, policy_version 333062 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:48,405][626795] Updated weights for policy 0, policy_version 333072 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:48,975][24592] Fps is (10 sec: 38502.6, 60 sec: 41233.1, 300 sec: 41209.9). Total num frames: 2728542208. Throughput: 0: 10203.5. Samples: 432124014. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:48,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:50,398][626795] Updated weights for policy 0, policy_version 333082 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:52,228][626795] Updated weights for policy 0, policy_version 333092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:53,975][24592] Fps is (10 sec: 38502.3, 60 sec: 41096.6, 300 sec: 41237.7). Total num frames: 2728755200. Throughput: 0: 10236.1. Samples: 432187824. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:53,977][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:54,229][626795] Updated weights for policy 0, policy_version 333102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:56,223][626795] Updated weights for policy 0, policy_version 333112 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:58,113][626795] Updated weights for policy 0, policy_version 333122 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:58,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41233.1, 300 sec: 41237.7). Total num frames: 2728968192. Throughput: 0: 10228.1. Samples: 432218928. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:47:58,977][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:47:59,943][626795] Updated weights for policy 0, policy_version 333132 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:01,954][626795] Updated weights for policy 0, policy_version 333142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:03,818][626795] Updated weights for policy 0, policy_version 333152 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:03,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41233.3, 300 sec: 41237.7). Total num frames: 2729181184. Throughput: 0: 10417.6. Samples: 432283218. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:03,977][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:05,739][626795] Updated weights for policy 0, policy_version 333162 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:07,692][626795] Updated weights for policy 0, policy_version 333172 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:08,976][24592] Fps is (10 sec: 42596.6, 60 sec: 41232.8, 300 sec: 41265.5). Total num frames: 2729394176. Throughput: 0: 10414.8. Samples: 432347316. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:08,978][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:09,526][626795] Updated weights for policy 0, policy_version 333182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:11,550][626795] Updated weights for policy 0, policy_version 333192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:13,543][626795] Updated weights for policy 0, policy_version 333202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:13,978][24592] Fps is (10 sec: 41767.3, 60 sec: 41640.6, 300 sec: 41237.3). Total num frames: 2729598976. Throughput: 0: 10429.7. Samples: 432379044. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:13,980][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:15,526][626795] Updated weights for policy 0, policy_version 333212 (0.0036)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:17,469][626795] Updated weights for policy 0, policy_version 333222 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:18,976][24592] Fps is (10 sec: 38503.5, 60 sec: 41233.0, 300 sec: 41237.8). Total num frames: 2729779200. Throughput: 0: 10412.5. Samples: 432441246. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:18,978][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:20,322][626795] Updated weights for policy 0, policy_version 333232 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:22,142][626795] Updated weights for policy 0, policy_version 333242 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:23,975][24592] Fps is (10 sec: 39332.4, 60 sec: 41233.0, 300 sec: 41265.5). Total num frames: 2729992192. Throughput: 0: 10212.5. Samples: 432496380. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:23,977][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:24,149][626795] Updated weights for policy 0, policy_version 333252 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:26,090][626795] Updated weights for policy 0, policy_version 333262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:27,882][626795] Updated weights for policy 0, policy_version 333272 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:28,975][24592] Fps is (10 sec: 42599.1, 60 sec: 41233.1, 300 sec: 41237.7). Total num frames: 2730205184. Throughput: 0: 10213.9. Samples: 432528150. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:28,976][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:29,840][626795] Updated weights for policy 0, policy_version 333282 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:31,665][626795] Updated weights for policy 0, policy_version 333292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:33,725][626795] Updated weights for policy 0, policy_version 333302 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:33,976][24592] Fps is (10 sec: 43416.0, 60 sec: 41369.3, 300 sec: 41293.2). Total num frames: 2730426368. Throughput: 0: 10403.9. Samples: 432592194. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:33,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:35,597][626795] Updated weights for policy 0, policy_version 333312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:37,364][626795] Updated weights for policy 0, policy_version 333322 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:38,975][24592] Fps is (10 sec: 43417.4, 60 sec: 41369.6, 300 sec: 41293.2). Total num frames: 2730639360. Throughput: 0: 10434.3. Samples: 432657366. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:38,978][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:39,391][626795] Updated weights for policy 0, policy_version 333332 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:41,246][626795] Updated weights for policy 0, policy_version 333342 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:43,154][626795] Updated weights for policy 0, policy_version 333352 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:43,975][24592] Fps is (10 sec: 42600.4, 60 sec: 41369.6, 300 sec: 41321.3). Total num frames: 2730852352. Throughput: 0: 10448.5. Samples: 432689112. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:43,977][24592] Avg episode reward: [(0, '4.366')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:45,106][626795] Updated weights for policy 0, policy_version 333362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:47,042][626795] Updated weights for policy 0, policy_version 333372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:48,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.7, 300 sec: 41293.2). Total num frames: 2731057152. Throughput: 0: 10451.9. Samples: 432753552. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:48,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:49,051][626795] Updated weights for policy 0, policy_version 333382 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:50,905][626795] Updated weights for policy 0, policy_version 333392 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:53,515][626795] Updated weights for policy 0, policy_version 333402 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:53,975][24592] Fps is (10 sec: 38502.5, 60 sec: 41369.6, 300 sec: 41293.2). Total num frames: 2731237376. Throughput: 0: 10278.4. Samples: 432809838. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:53,977][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:55,438][626795] Updated weights for policy 0, policy_version 333412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:57,392][626795] Updated weights for policy 0, policy_version 333422 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:58,975][24592] Fps is (10 sec: 40140.7, 60 sec: 41506.1, 300 sec: 41321.0). Total num frames: 2731458560. Throughput: 0: 10261.4. Samples: 432840780. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:48:58,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:48:59,359][626795] Updated weights for policy 0, policy_version 333432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:01,155][626795] Updated weights for policy 0, policy_version 333442 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:03,138][626795] Updated weights for policy 0, policy_version 333452 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:03,975][24592] Fps is (10 sec: 43417.2, 60 sec: 41506.1, 300 sec: 41321.2). Total num frames: 2731671552. Throughput: 0: 10309.2. Samples: 432905160. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:03,977][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:04,009][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000333457_2731679744.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:04,171][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000332243_2721734656.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:05,087][626795] Updated weights for policy 0, policy_version 333462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:07,016][626795] Updated weights for policy 0, policy_version 333472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:08,827][626795] Updated weights for policy 0, policy_version 333482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:08,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41506.4, 300 sec: 41321.1). Total num frames: 2731884544. Throughput: 0: 10514.8. Samples: 432969546. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:08,976][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:09,626][626772] Signal inference workers to stop experience collection... (5600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:09,626][626772] Signal inference workers to resume experience collection... (5600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:09,635][626795] InferenceWorker_p0-w0: stopping experience collection (5600 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:09,643][626795] InferenceWorker_p0-w0: resuming experience collection (5600 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:10,825][626795] Updated weights for policy 0, policy_version 333492 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:12,755][626795] Updated weights for policy 0, policy_version 333502 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:13,976][24592] Fps is (10 sec: 42597.6, 60 sec: 41644.5, 300 sec: 41348.7). Total num frames: 2732097536. Throughput: 0: 10506.2. Samples: 433000932. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:13,977][24592] Avg episode reward: [(0, '4.437')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:14,743][626795] Updated weights for policy 0, policy_version 333512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:16,622][626795] Updated weights for policy 0, policy_version 333522 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:18,458][626795] Updated weights for policy 0, policy_version 333532 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:18,976][24592] Fps is (10 sec: 42596.3, 60 sec: 42188.5, 300 sec: 41348.8). Total num frames: 2732310528. Throughput: 0: 10497.2. Samples: 433064568. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:18,977][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:20,472][626795] Updated weights for policy 0, policy_version 333542 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:22,327][626795] Updated weights for policy 0, policy_version 333552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:23,976][24592] Fps is (10 sec: 42598.8, 60 sec: 42188.8, 300 sec: 41487.6). Total num frames: 2732523520. Throughput: 0: 10487.6. Samples: 433129308. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:23,977][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:25,010][626795] Updated weights for policy 0, policy_version 333562 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:26,882][626795] Updated weights for policy 0, policy_version 333572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:28,907][626795] Updated weights for policy 0, policy_version 333582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:28,975][24592] Fps is (10 sec: 39323.6, 60 sec: 41642.6, 300 sec: 41376.5). Total num frames: 2732703744. Throughput: 0: 10311.5. Samples: 433153128. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:28,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:30,765][626795] Updated weights for policy 0, policy_version 333592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:32,675][626795] Updated weights for policy 0, policy_version 333602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:33,975][24592] Fps is (10 sec: 39322.3, 60 sec: 41506.5, 300 sec: 41348.8). Total num frames: 2732916736. Throughput: 0: 10317.2. Samples: 433217826. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:33,977][24592] Avg episode reward: [(0, '5.009')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:34,626][626795] Updated weights for policy 0, policy_version 333612 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:36,522][626795] Updated weights for policy 0, policy_version 333622 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:38,368][626795] Updated weights for policy 0, policy_version 333632 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:38,975][24592] Fps is (10 sec: 43417.5, 60 sec: 41642.7, 300 sec: 41404.3). Total num frames: 2733137920. Throughput: 0: 10488.0. Samples: 433281798. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:38,978][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:40,348][626795] Updated weights for policy 0, policy_version 333642 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:42,191][626795] Updated weights for policy 0, policy_version 333652 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:43,975][24592] Fps is (10 sec: 43417.4, 60 sec: 41642.7, 300 sec: 41432.1). Total num frames: 2733350912. Throughput: 0: 10514.0. Samples: 433313910. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:43,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:44,286][626795] Updated weights for policy 0, policy_version 333662 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:46,128][626795] Updated weights for policy 0, policy_version 333672 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:48,016][626795] Updated weights for policy 0, policy_version 333682 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:48,976][24592] Fps is (10 sec: 41778.4, 60 sec: 41642.5, 300 sec: 41404.3). Total num frames: 2733555712. Throughput: 0: 10511.0. Samples: 433378158. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:48,987][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:49,848][626795] Updated weights for policy 0, policy_version 333692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:51,831][626795] Updated weights for policy 0, policy_version 333702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:53,689][626795] Updated weights for policy 0, policy_version 333712 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:53,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42325.3, 300 sec: 41432.1). Total num frames: 2733776896. Throughput: 0: 10505.2. Samples: 433442280. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:53,976][24592] Avg episode reward: [(0, '4.823')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:55,686][626795] Updated weights for policy 0, policy_version 333722 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:49:58,296][626795] Updated weights for policy 0, policy_version 333732 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:58,975][24592] Fps is (10 sec: 40141.5, 60 sec: 41642.7, 300 sec: 41432.1). Total num frames: 2733957120. Throughput: 0: 10502.5. Samples: 433473540. Policy #0 lag: (min: 0.0, avg: 1.9, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:49:58,977][24592] Avg episode reward: [(0, '4.540')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:00,428][626795] Updated weights for policy 0, policy_version 333742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:02,267][626795] Updated weights for policy 0, policy_version 333752 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:03,975][24592] Fps is (10 sec: 38502.7, 60 sec: 41506.2, 300 sec: 41404.3). Total num frames: 2734161920. Throughput: 0: 10308.1. Samples: 433528428. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:03,977][24592] Avg episode reward: [(0, '4.734')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:04,212][626795] Updated weights for policy 0, policy_version 333762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:06,144][626795] Updated weights for policy 0, policy_version 333772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:08,097][626795] Updated weights for policy 0, policy_version 333782 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:08,993][24592] Fps is (10 sec: 41706.2, 60 sec: 41494.0, 300 sec: 41429.6). Total num frames: 2734374912. Throughput: 0: 10282.3. Samples: 433592190. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:08,995][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:10,014][626795] Updated weights for policy 0, policy_version 333792 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:11,903][626795] Updated weights for policy 0, policy_version 333802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:13,937][626795] Updated weights for policy 0, policy_version 333812 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:13,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41506.3, 300 sec: 41432.1). Total num frames: 2734587904. Throughput: 0: 10467.7. Samples: 433624176. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:13,976][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:15,767][626795] Updated weights for policy 0, policy_version 333822 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:17,609][626795] Updated weights for policy 0, policy_version 333832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:18,975][24592] Fps is (10 sec: 43493.8, 60 sec: 41643.0, 300 sec: 41487.6). Total num frames: 2734809088. Throughput: 0: 10450.1. Samples: 433688082. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:18,976][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:19,664][626795] Updated weights for policy 0, policy_version 333842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:21,460][626795] Updated weights for policy 0, policy_version 333852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:23,371][626795] Updated weights for policy 0, policy_version 333862 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:23,976][24592] Fps is (10 sec: 42596.5, 60 sec: 41505.9, 300 sec: 41459.8). Total num frames: 2735013888. Throughput: 0: 10468.6. Samples: 433752888. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:23,977][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:25,429][626795] Updated weights for policy 0, policy_version 333872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:27,278][626795] Updated weights for policy 0, policy_version 333882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:28,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42188.8, 300 sec: 41571.0). Total num frames: 2735235072. Throughput: 0: 10444.7. Samples: 433783920. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:28,977][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:29,325][626795] Updated weights for policy 0, policy_version 333892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:31,947][626795] Updated weights for policy 0, policy_version 333902 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:33,818][626795] Updated weights for policy 0, policy_version 333912 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:33,975][24592] Fps is (10 sec: 39323.2, 60 sec: 41506.1, 300 sec: 41459.9). Total num frames: 2735407104. Throughput: 0: 10258.7. Samples: 433839798. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:33,976][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:35,741][626795] Updated weights for policy 0, policy_version 333922 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:37,685][626795] Updated weights for policy 0, policy_version 333932 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:38,975][24592] Fps is (10 sec: 38502.3, 60 sec: 41369.6, 300 sec: 41459.9). Total num frames: 2735620096. Throughput: 0: 10266.3. Samples: 433904262. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:38,976][24592] Avg episode reward: [(0, '4.351')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:39,568][626795] Updated weights for policy 0, policy_version 333942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:41,474][626795] Updated weights for policy 0, policy_version 333952 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:43,354][626795] Updated weights for policy 0, policy_version 333962 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:43,976][24592] Fps is (10 sec: 42597.1, 60 sec: 41369.4, 300 sec: 41487.7). Total num frames: 2735833088. Throughput: 0: 10272.9. Samples: 433935822. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:43,978][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:45,382][626795] Updated weights for policy 0, policy_version 333972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:47,361][626795] Updated weights for policy 0, policy_version 333982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:48,976][24592] Fps is (10 sec: 42597.0, 60 sec: 41506.1, 300 sec: 41487.6). Total num frames: 2736046080. Throughput: 0: 10444.9. Samples: 433998450. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:48,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:49,321][626795] Updated weights for policy 0, policy_version 333992 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:51,210][626795] Updated weights for policy 0, policy_version 334002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:53,204][626795] Updated weights for policy 0, policy_version 334012 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:53,975][24592] Fps is (10 sec: 41780.7, 60 sec: 41233.1, 300 sec: 41459.9). Total num frames: 2736250880. Throughput: 0: 10437.1. Samples: 434061678. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:53,977][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:55,195][626795] Updated weights for policy 0, policy_version 334022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:57,243][626795] Updated weights for policy 0, policy_version 334032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:58,975][24592] Fps is (10 sec: 41780.2, 60 sec: 41779.2, 300 sec: 41459.8). Total num frames: 2736463872. Throughput: 0: 10414.5. Samples: 434092830. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:50:58,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:50:59,052][626795] Updated weights for policy 0, policy_version 334042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:01,102][626795] Updated weights for policy 0, policy_version 334052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:02,913][626795] Updated weights for policy 0, policy_version 334062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:03,975][24592] Fps is (10 sec: 39321.3, 60 sec: 41369.6, 300 sec: 41487.6). Total num frames: 2736644096. Throughput: 0: 10414.2. Samples: 434156724. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:03,977][24592] Avg episode reward: [(0, '4.465')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000334063_2736644096.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:04,110][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000332850_2726707200.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:05,736][626795] Updated weights for policy 0, policy_version 334072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:07,527][626795] Updated weights for policy 0, policy_version 334082 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:08,976][24592] Fps is (10 sec: 39320.5, 60 sec: 41381.4, 300 sec: 41487.6). Total num frames: 2736857088. Throughput: 0: 10201.6. Samples: 434211960. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:08,978][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:09,602][626795] Updated weights for policy 0, policy_version 334092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:11,482][626795] Updated weights for policy 0, policy_version 334102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:13,389][626795] Updated weights for policy 0, policy_version 334112 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:13,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41369.5, 300 sec: 41487.6). Total num frames: 2737070080. Throughput: 0: 10210.5. Samples: 434243394. Policy #0 lag: (min: 0.0, avg: 2.4, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:13,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:15,229][626795] Updated weights for policy 0, policy_version 334122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:17,267][626795] Updated weights for policy 0, policy_version 334132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:18,976][24592] Fps is (10 sec: 42596.6, 60 sec: 41232.5, 300 sec: 41487.6). Total num frames: 2737283072. Throughput: 0: 10389.2. Samples: 434307318. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:18,977][24592] Avg episode reward: [(0, '4.433')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:19,134][626795] Updated weights for policy 0, policy_version 334142 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:21,141][626795] Updated weights for policy 0, policy_version 334152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:22,881][626795] Updated weights for policy 0, policy_version 334162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:23,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41233.4, 300 sec: 41459.9). Total num frames: 2737487872. Throughput: 0: 10384.5. Samples: 434371566. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:23,979][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:24,873][626795] Updated weights for policy 0, policy_version 334172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:26,857][626795] Updated weights for policy 0, policy_version 334182 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:28,691][626795] Updated weights for policy 0, policy_version 334192 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:28,975][24592] Fps is (10 sec: 42601.4, 60 sec: 41233.0, 300 sec: 41487.6). Total num frames: 2737709056. Throughput: 0: 10391.5. Samples: 434403438. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:28,976][24592] Avg episode reward: [(0, '4.395')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:30,551][626795] Updated weights for policy 0, policy_version 334202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:32,470][626795] Updated weights for policy 0, policy_version 334212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:33,976][24592] Fps is (10 sec: 44232.8, 60 sec: 42051.6, 300 sec: 41515.3). Total num frames: 2737930240. Throughput: 0: 10440.9. Samples: 434468298. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:33,977][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:34,457][626795] Updated weights for policy 0, policy_version 334222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:37,022][626795] Updated weights for policy 0, policy_version 334232 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:38,946][626795] Updated weights for policy 0, policy_version 334242 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:38,975][24592] Fps is (10 sec: 40141.0, 60 sec: 41506.1, 300 sec: 41543.2). Total num frames: 2738110464. Throughput: 0: 10293.2. Samples: 434524872. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:38,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:40,884][626795] Updated weights for policy 0, policy_version 334252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:42,884][626795] Updated weights for policy 0, policy_version 334262 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:43,975][24592] Fps is (10 sec: 39325.0, 60 sec: 41506.3, 300 sec: 41543.2). Total num frames: 2738323456. Throughput: 0: 10296.0. Samples: 434556150. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:43,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:44,764][626795] Updated weights for policy 0, policy_version 334272 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:46,707][626795] Updated weights for policy 0, policy_version 334282 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:48,630][626795] Updated weights for policy 0, policy_version 334292 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:48,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41506.3, 300 sec: 41515.4). Total num frames: 2738536448. Throughput: 0: 10302.3. Samples: 434620326. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:48,977][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:50,496][626795] Updated weights for policy 0, policy_version 334302 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:52,490][626795] Updated weights for policy 0, policy_version 334312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:53,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41506.1, 300 sec: 41515.4). Total num frames: 2738741248. Throughput: 0: 10501.9. Samples: 434684544. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:53,976][24592] Avg episode reward: [(0, '4.424')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:54,435][626795] Updated weights for policy 0, policy_version 334322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:56,366][626795] Updated weights for policy 0, policy_version 334332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:51:58,259][626795] Updated weights for policy 0, policy_version 334342 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:58,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41642.5, 300 sec: 41543.2). Total num frames: 2738962432. Throughput: 0: 10514.2. Samples: 434716536. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:51:58,976][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:00,169][626795] Updated weights for policy 0, policy_version 334352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:01,960][626795] Updated weights for policy 0, policy_version 334362 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:03,857][626795] Updated weights for policy 0, policy_version 334372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:03,975][24592] Fps is (10 sec: 43417.4, 60 sec: 42188.8, 300 sec: 41543.2). Total num frames: 2739175424. Throughput: 0: 10527.8. Samples: 434781060. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:03,980][24592] Avg episode reward: [(0, '4.836')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:05,765][626795] Updated weights for policy 0, policy_version 334382 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:07,688][626795] Updated weights for policy 0, policy_version 334392 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:08,975][24592] Fps is (10 sec: 42599.5, 60 sec: 42189.0, 300 sec: 41654.2). Total num frames: 2739388416. Throughput: 0: 10541.3. Samples: 434845926. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:08,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:10,374][626795] Updated weights for policy 0, policy_version 334402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:12,273][626795] Updated weights for policy 0, policy_version 334412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:13,975][24592] Fps is (10 sec: 40140.6, 60 sec: 41779.2, 300 sec: 41598.7). Total num frames: 2739576832. Throughput: 0: 10369.2. Samples: 434870052. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:13,977][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:14,124][626795] Updated weights for policy 0, policy_version 334422 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:16,041][626795] Updated weights for policy 0, policy_version 334432 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:17,915][626795] Updated weights for policy 0, policy_version 334442 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:18,976][24592] Fps is (10 sec: 40138.4, 60 sec: 41779.3, 300 sec: 41598.6). Total num frames: 2739789824. Throughput: 0: 10357.8. Samples: 434934396. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:18,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:19,861][626795] Updated weights for policy 0, policy_version 334452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:21,770][626795] Updated weights for policy 0, policy_version 334462 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:23,573][626795] Updated weights for policy 0, policy_version 334472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:23,976][24592] Fps is (10 sec: 42598.0, 60 sec: 41915.6, 300 sec: 41598.7). Total num frames: 2740002816. Throughput: 0: 10545.6. Samples: 434999424. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:23,976][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:25,551][626795] Updated weights for policy 0, policy_version 334482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:27,365][626795] Updated weights for policy 0, policy_version 334492 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:28,981][24592] Fps is (10 sec: 43393.7, 60 sec: 41911.5, 300 sec: 41625.6). Total num frames: 2740224000. Throughput: 0: 10564.1. Samples: 435031596. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:28,982][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:29,367][626795] Updated weights for policy 0, policy_version 334502 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:31,137][626795] Updated weights for policy 0, policy_version 334512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:33,098][626795] Updated weights for policy 0, policy_version 334522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:33,975][24592] Fps is (10 sec: 43418.6, 60 sec: 41779.9, 300 sec: 41626.5). Total num frames: 2740436992. Throughput: 0: 10589.9. Samples: 435096870. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:33,977][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:35,071][626795] Updated weights for policy 0, policy_version 334532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:37,019][626795] Updated weights for policy 0, policy_version 334542 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:38,783][626795] Updated weights for policy 0, policy_version 334552 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:38,975][24592] Fps is (10 sec: 43444.1, 60 sec: 42461.9, 300 sec: 41654.2). Total num frames: 2740658176. Throughput: 0: 10597.7. Samples: 435161442. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:38,976][24592] Avg episode reward: [(0, '4.844')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:40,702][626795] Updated weights for policy 0, policy_version 334562 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:43,292][626795] Updated weights for policy 0, policy_version 334572 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:43,975][24592] Fps is (10 sec: 40140.6, 60 sec: 41915.8, 300 sec: 41682.0). Total num frames: 2740838400. Throughput: 0: 10611.0. Samples: 435194028. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:43,976][24592] Avg episode reward: [(0, '4.880')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:45,373][626795] Updated weights for policy 0, policy_version 334582 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:47,195][626795] Updated weights for policy 0, policy_version 334592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:48,975][24592] Fps is (10 sec: 39321.5, 60 sec: 41915.7, 300 sec: 41682.0). Total num frames: 2741051392. Throughput: 0: 10422.8. Samples: 435250086. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:48,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:49,191][626795] Updated weights for policy 0, policy_version 334602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:50,981][626795] Updated weights for policy 0, policy_version 334612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:53,000][626795] Updated weights for policy 0, policy_version 334622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:53,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42052.3, 300 sec: 41682.0). Total num frames: 2741264384. Throughput: 0: 10406.9. Samples: 435314238. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:53,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:54,780][626795] Updated weights for policy 0, policy_version 334632 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:56,733][626795] Updated weights for policy 0, policy_version 334642 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:52:58,568][626795] Updated weights for policy 0, policy_version 334652 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:58,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42052.4, 300 sec: 41709.8). Total num frames: 2741485568. Throughput: 0: 10599.1. Samples: 435347010. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:52:58,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:00,597][626795] Updated weights for policy 0, policy_version 334662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:02,382][626795] Updated weights for policy 0, policy_version 334672 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:03,976][24592] Fps is (10 sec: 43415.4, 60 sec: 42052.0, 300 sec: 41709.8). Total num frames: 2741698560. Throughput: 0: 10616.2. Samples: 435412122. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:03,979][24592] Avg episode reward: [(0, '4.846')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000334680_2741698560.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:04,063][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000333457_2731679744.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:04,326][626795] Updated weights for policy 0, policy_version 334682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:06,357][626795] Updated weights for policy 0, policy_version 334692 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:08,238][626795] Updated weights for policy 0, policy_version 334702 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:08,976][24592] Fps is (10 sec: 41777.2, 60 sec: 41915.4, 300 sec: 41710.1). Total num frames: 2741903360. Throughput: 0: 10546.5. Samples: 435474018. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:08,977][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:10,263][626795] Updated weights for policy 0, policy_version 334712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:12,181][626795] Updated weights for policy 0, policy_version 334722 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:13,975][24592] Fps is (10 sec: 40961.7, 60 sec: 42188.8, 300 sec: 41793.1). Total num frames: 2742108160. Throughput: 0: 10537.3. Samples: 435505710. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:13,978][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:14,321][626795] Updated weights for policy 0, policy_version 334732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:16,833][626795] Updated weights for policy 0, policy_version 334742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:18,747][626795] Updated weights for policy 0, policy_version 334752 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:18,977][24592] Fps is (10 sec: 39318.9, 60 sec: 41778.8, 300 sec: 41709.6). Total num frames: 2742296576. Throughput: 0: 10307.3. Samples: 435560712. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:18,978][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:20,716][626795] Updated weights for policy 0, policy_version 334762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:22,709][626795] Updated weights for policy 0, policy_version 334772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:23,976][24592] Fps is (10 sec: 39317.5, 60 sec: 41642.1, 300 sec: 41681.9). Total num frames: 2742501376. Throughput: 0: 10297.6. Samples: 435624846. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:23,978][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:24,585][626795] Updated weights for policy 0, policy_version 334782 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:26,522][626795] Updated weights for policy 0, policy_version 334792 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:28,473][626795] Updated weights for policy 0, policy_version 334802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:28,975][24592] Fps is (10 sec: 41784.0, 60 sec: 41510.3, 300 sec: 41654.3). Total num frames: 2742714368. Throughput: 0: 10263.2. Samples: 435655872. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:28,976][24592] Avg episode reward: [(0, '4.275')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:30,385][626795] Updated weights for policy 0, policy_version 334812 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:32,234][626795] Updated weights for policy 0, policy_version 334822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:33,975][24592] Fps is (10 sec: 42602.8, 60 sec: 41506.1, 300 sec: 41654.2). Total num frames: 2742927360. Throughput: 0: 10446.0. Samples: 435720156. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:33,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:34,190][626795] Updated weights for policy 0, policy_version 334832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:36,141][626795] Updated weights for policy 0, policy_version 334842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:37,940][626795] Updated weights for policy 0, policy_version 334852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:38,978][24592] Fps is (10 sec: 43405.9, 60 sec: 41504.3, 300 sec: 41681.6). Total num frames: 2743148544. Throughput: 0: 10452.8. Samples: 435784644. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:38,979][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:39,910][626795] Updated weights for policy 0, policy_version 334862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:41,857][626795] Updated weights for policy 0, policy_version 334872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:43,660][626795] Updated weights for policy 0, policy_version 334882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:43,976][24592] Fps is (10 sec: 43416.3, 60 sec: 42052.0, 300 sec: 41709.7). Total num frames: 2743361536. Throughput: 0: 10446.6. Samples: 435817110. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:43,977][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:45,570][626795] Updated weights for policy 0, policy_version 334892 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:47,531][626795] Updated weights for policy 0, policy_version 334902 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:48,975][24592] Fps is (10 sec: 39332.2, 60 sec: 41506.2, 300 sec: 41709.8). Total num frames: 2743541760. Throughput: 0: 10432.0. Samples: 435881556. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:48,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:50,122][626795] Updated weights for policy 0, policy_version 334912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:52,097][626795] Updated weights for policy 0, policy_version 334922 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:53,976][24592] Fps is (10 sec: 39321.7, 60 sec: 41505.9, 300 sec: 41682.0). Total num frames: 2743754752. Throughput: 0: 10301.1. Samples: 435937566. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:53,978][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:54,111][626795] Updated weights for policy 0, policy_version 334932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:56,022][626795] Updated weights for policy 0, policy_version 334942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:57,877][626795] Updated weights for policy 0, policy_version 334952 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:58,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41369.5, 300 sec: 41682.0). Total num frames: 2743967744. Throughput: 0: 10282.2. Samples: 435968412. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:53:58,977][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:53:59,941][626795] Updated weights for policy 0, policy_version 334962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:01,692][626795] Updated weights for policy 0, policy_version 334972 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:03,605][626795] Updated weights for policy 0, policy_version 334982 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:03,975][24592] Fps is (10 sec: 42599.3, 60 sec: 41369.9, 300 sec: 41682.0). Total num frames: 2744180736. Throughput: 0: 10494.8. Samples: 436032966. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:03,976][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:05,527][626795] Updated weights for policy 0, policy_version 334992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:07,387][626795] Updated weights for policy 0, policy_version 335002 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:08,975][24592] Fps is (10 sec: 43418.6, 60 sec: 41643.0, 300 sec: 41709.8). Total num frames: 2744401920. Throughput: 0: 10526.6. Samples: 436098534. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:08,976][24592] Avg episode reward: [(0, '4.887')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:09,309][626795] Updated weights for policy 0, policy_version 335012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:11,146][626795] Updated weights for policy 0, policy_version 335022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:13,100][626795] Updated weights for policy 0, policy_version 335032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:13,976][24592] Fps is (10 sec: 43415.7, 60 sec: 41778.9, 300 sec: 41709.8). Total num frames: 2744614912. Throughput: 0: 10552.7. Samples: 436130748. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:13,977][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:14,969][626795] Updated weights for policy 0, policy_version 335042 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:16,976][626795] Updated weights for policy 0, policy_version 335052 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:18,753][626795] Updated weights for policy 0, policy_version 335062 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:18,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42326.2, 300 sec: 41737.6). Total num frames: 2744836096. Throughput: 0: 10555.1. Samples: 436195134. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:18,979][24592] Avg episode reward: [(0, '4.467')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:20,665][626795] Updated weights for policy 0, policy_version 335072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:23,410][626795] Updated weights for policy 0, policy_version 335082 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:23,976][24592] Fps is (10 sec: 39323.2, 60 sec: 41779.9, 300 sec: 41709.8). Total num frames: 2745008128. Throughput: 0: 10368.1. Samples: 436251180. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:23,977][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:25,499][626795] Updated weights for policy 0, policy_version 335092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:27,268][626795] Updated weights for policy 0, policy_version 335102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:28,976][24592] Fps is (10 sec: 38501.5, 60 sec: 41779.1, 300 sec: 41709.7). Total num frames: 2745221120. Throughput: 0: 10330.2. Samples: 436281966. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:28,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:29,377][626795] Updated weights for policy 0, policy_version 335112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:31,229][626795] Updated weights for policy 0, policy_version 335122 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:32,930][626795] Updated weights for policy 0, policy_version 335132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:33,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41779.2, 300 sec: 41682.0). Total num frames: 2745434112. Throughput: 0: 10344.3. Samples: 436347048. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:33,977][24592] Avg episode reward: [(0, '4.525')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:35,000][626795] Updated weights for policy 0, policy_version 335142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:36,767][626795] Updated weights for policy 0, policy_version 335152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:38,771][626795] Updated weights for policy 0, policy_version 335162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:38,977][24592] Fps is (10 sec: 43413.4, 60 sec: 41780.2, 300 sec: 41709.6). Total num frames: 2745655296. Throughput: 0: 10535.8. Samples: 436411686. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:38,978][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:40,570][626795] Updated weights for policy 0, policy_version 335172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:42,492][626795] Updated weights for policy 0, policy_version 335182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:43,976][24592] Fps is (10 sec: 44236.1, 60 sec: 41915.8, 300 sec: 41765.3). Total num frames: 2745876480. Throughput: 0: 10570.0. Samples: 436444062. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:43,976][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:44,385][626795] Updated weights for policy 0, policy_version 335192 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:46,350][626795] Updated weights for policy 0, policy_version 335202 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:48,091][626795] Updated weights for policy 0, policy_version 335212 (0.0031)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:48,975][24592] Fps is (10 sec: 43422.5, 60 sec: 42461.8, 300 sec: 41737.6). Total num frames: 2746089472. Throughput: 0: 10587.5. Samples: 436509402. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:48,977][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:50,069][626795] Updated weights for policy 0, policy_version 335222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:52,009][626795] Updated weights for policy 0, policy_version 335232 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:53,861][626795] Updated weights for policy 0, policy_version 335242 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:54,745][24592] Fps is (10 sec: 39556.0, 60 sec: 41924.5, 300 sec: 41739.8). Total num frames: 2746302464. Throughput: 0: 10398.2. Samples: 436574454. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:54,747][24592] Avg episode reward: [(0, '4.857')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:56,587][626795] Updated weights for policy 0, policy_version 335252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:54:58,601][626795] Updated weights for policy 0, policy_version 335262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:58,977][24592] Fps is (10 sec: 39316.2, 60 sec: 41914.9, 300 sec: 41765.1). Total num frames: 2746482688. Throughput: 0: 10363.0. Samples: 436597092. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:54:58,979][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:00,500][626795] Updated weights for policy 0, policy_version 335272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:02,445][626795] Updated weights for policy 0, policy_version 335282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:03,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41915.8, 300 sec: 41767.8). Total num frames: 2746695680. Throughput: 0: 10347.6. Samples: 436660776. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:03,978][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000335290_2746695680.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:04,107][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000334063_2736644096.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:04,299][626795] Updated weights for policy 0, policy_version 335292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:06,229][626795] Updated weights for policy 0, policy_version 335302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:08,084][626795] Updated weights for policy 0, policy_version 335312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:08,975][24592] Fps is (10 sec: 42604.4, 60 sec: 41779.2, 300 sec: 41765.3). Total num frames: 2746908672. Throughput: 0: 10550.5. Samples: 436725954. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:08,976][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:10,057][626795] Updated weights for policy 0, policy_version 335322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:11,814][626795] Updated weights for policy 0, policy_version 335332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:13,625][626795] Updated weights for policy 0, policy_version 335342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:13,975][24592] Fps is (10 sec: 43417.8, 60 sec: 41916.1, 300 sec: 41765.3). Total num frames: 2747129856. Throughput: 0: 10590.1. Samples: 436758516. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:13,976][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:15,601][626795] Updated weights for policy 0, policy_version 335352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:17,435][626795] Updated weights for policy 0, policy_version 335362 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:18,975][24592] Fps is (10 sec: 43417.5, 60 sec: 41779.1, 300 sec: 41793.1). Total num frames: 2747342848. Throughput: 0: 10615.2. Samples: 436824732. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:18,977][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:19,287][626795] Updated weights for policy 0, policy_version 335372 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:21,238][626795] Updated weights for policy 0, policy_version 335382 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:23,114][626795] Updated weights for policy 0, policy_version 335392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:23,975][24592] Fps is (10 sec: 43417.0, 60 sec: 42598.4, 300 sec: 41793.1). Total num frames: 2747564032. Throughput: 0: 10594.3. Samples: 436888416. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:23,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:25,046][626795] Updated weights for policy 0, policy_version 335402 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:26,943][626795] Updated weights for policy 0, policy_version 335412 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:28,975][24592] Fps is (10 sec: 39321.3, 60 sec: 41915.8, 300 sec: 41793.1). Total num frames: 2747736064. Throughput: 0: 10599.5. Samples: 436921038. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:28,977][24592] Avg episode reward: [(0, '5.049')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:29,851][626795] Updated weights for policy 0, policy_version 335422 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:31,749][626795] Updated weights for policy 0, policy_version 335432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:33,666][626795] Updated weights for policy 0, policy_version 335442 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:33,975][24592] Fps is (10 sec: 38502.9, 60 sec: 41915.7, 300 sec: 41793.1). Total num frames: 2747949056. Throughput: 0: 10344.0. Samples: 436974882. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:33,977][24592] Avg episode reward: [(0, '4.514')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:35,629][626795] Updated weights for policy 0, policy_version 335452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:37,466][626795] Updated weights for policy 0, policy_version 335462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:38,975][24592] Fps is (10 sec: 43418.2, 60 sec: 41916.6, 300 sec: 41820.9). Total num frames: 2748170240. Throughput: 0: 10530.7. Samples: 437040234. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:38,977][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:39,398][626795] Updated weights for policy 0, policy_version 335472 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:41,188][626795] Updated weights for policy 0, policy_version 335482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:42,954][626795] Updated weights for policy 0, policy_version 335492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:43,975][24592] Fps is (10 sec: 44236.8, 60 sec: 41915.8, 300 sec: 41848.7). Total num frames: 2748391424. Throughput: 0: 10572.1. Samples: 437072820. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:43,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:44,957][626795] Updated weights for policy 0, policy_version 335502 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:46,767][626795] Updated weights for policy 0, policy_version 335512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:48,678][626795] Updated weights for policy 0, policy_version 335522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:48,975][24592] Fps is (10 sec: 43417.6, 60 sec: 41915.8, 300 sec: 41876.4). Total num frames: 2748604416. Throughput: 0: 10606.1. Samples: 437138052. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:48,976][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:50,688][626795] Updated weights for policy 0, policy_version 335532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:52,570][626795] Updated weights for policy 0, policy_version 335542 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:53,507][626772] Signal inference workers to stop experience collection... (5650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:53,507][626772] Signal inference workers to resume experience collection... (5650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:53,518][626795] InferenceWorker_p0-w0: stopping experience collection (5650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:53,524][626795] InferenceWorker_p0-w0: resuming experience collection (5650 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:53,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42460.1, 300 sec: 41876.4). Total num frames: 2748817408. Throughput: 0: 10603.5. Samples: 437203110. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:53,977][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:54,466][626795] Updated weights for policy 0, policy_version 335552 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:56,329][626795] Updated weights for policy 0, policy_version 335562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:55:58,219][626795] Updated weights for policy 0, policy_version 335572 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:58,975][24592] Fps is (10 sec: 43417.0, 60 sec: 42599.3, 300 sec: 42015.2). Total num frames: 2749038592. Throughput: 0: 10589.4. Samples: 437235042. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:55:58,977][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:01,046][626795] Updated weights for policy 0, policy_version 335582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:02,885][626795] Updated weights for policy 0, policy_version 335592 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:03,978][24592] Fps is (10 sec: 39313.1, 60 sec: 41914.2, 300 sec: 41876.1). Total num frames: 2749210624. Throughput: 0: 10341.2. Samples: 437290110. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:03,978][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:05,034][626795] Updated weights for policy 0, policy_version 335602 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:06,800][626795] Updated weights for policy 0, policy_version 335612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:08,700][626795] Updated weights for policy 0, policy_version 335622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:08,976][24592] Fps is (10 sec: 38499.3, 60 sec: 41915.1, 300 sec: 41876.3). Total num frames: 2749423616. Throughput: 0: 10352.7. Samples: 437354298. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:08,978][24592] Avg episode reward: [(0, '4.873')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:10,514][626795] Updated weights for policy 0, policy_version 335632 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:12,433][626795] Updated weights for policy 0, policy_version 335642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:13,975][24592] Fps is (10 sec: 43427.3, 60 sec: 41915.7, 300 sec: 41904.3). Total num frames: 2749644800. Throughput: 0: 10342.2. Samples: 437386434. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:13,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:14,336][626795] Updated weights for policy 0, policy_version 335652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:16,229][626795] Updated weights for policy 0, policy_version 335662 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:18,105][626795] Updated weights for policy 0, policy_version 335672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:18,976][24592] Fps is (10 sec: 43420.5, 60 sec: 41915.6, 300 sec: 41931.9). Total num frames: 2749857792. Throughput: 0: 10598.2. Samples: 437451804. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:18,978][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:20,047][626795] Updated weights for policy 0, policy_version 335682 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:21,841][626795] Updated weights for policy 0, policy_version 335692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:23,722][626795] Updated weights for policy 0, policy_version 335702 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:23,976][24592] Fps is (10 sec: 42597.1, 60 sec: 41779.1, 300 sec: 41904.1). Total num frames: 2750070784. Throughput: 0: 10605.7. Samples: 437517492. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:23,976][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:25,642][626795] Updated weights for policy 0, policy_version 335712 (0.0040)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:27,610][626795] Updated weights for policy 0, policy_version 335722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:28,975][24592] Fps is (10 sec: 43418.9, 60 sec: 42598.5, 300 sec: 41904.3). Total num frames: 2750291968. Throughput: 0: 10588.7. Samples: 437549310. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:28,979][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:29,561][626795] Updated weights for policy 0, policy_version 335732 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:31,367][626795] Updated weights for policy 0, policy_version 335742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:33,975][24592] Fps is (10 sec: 40142.1, 60 sec: 42052.3, 300 sec: 41904.2). Total num frames: 2750472192. Throughput: 0: 10581.3. Samples: 437614212. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:33,977][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:34,127][626795] Updated weights for policy 0, policy_version 335752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:36,144][626795] Updated weights for policy 0, policy_version 335762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:38,071][626795] Updated weights for policy 0, policy_version 335772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:38,975][24592] Fps is (10 sec: 39321.5, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2750685184. Throughput: 0: 10346.2. Samples: 437668686. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:38,977][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:39,835][626795] Updated weights for policy 0, policy_version 335782 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:41,752][626795] Updated weights for policy 0, policy_version 335792 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:43,662][626795] Updated weights for policy 0, policy_version 335802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:43,976][24592] Fps is (10 sec: 42597.7, 60 sec: 41779.1, 300 sec: 41904.1). Total num frames: 2750898176. Throughput: 0: 10352.1. Samples: 437700888. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:43,976][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:45,624][626795] Updated weights for policy 0, policy_version 335812 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:47,490][626795] Updated weights for policy 0, policy_version 335822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:48,975][24592] Fps is (10 sec: 43417.4, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2751119360. Throughput: 0: 10585.2. Samples: 437766420. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:48,977][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:49,346][626795] Updated weights for policy 0, policy_version 335832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:51,254][626795] Updated weights for policy 0, policy_version 335842 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:53,082][626795] Updated weights for policy 0, policy_version 335852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:53,975][24592] Fps is (10 sec: 43418.2, 60 sec: 41915.8, 300 sec: 41932.0). Total num frames: 2751332352. Throughput: 0: 10586.9. Samples: 437830698. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:53,977][24592] Avg episode reward: [(0, '4.687')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:55,123][626795] Updated weights for policy 0, policy_version 335862 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:56,864][626795] Updated weights for policy 0, policy_version 335872 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:56:58,857][626795] Updated weights for policy 0, policy_version 335882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:58,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41779.3, 300 sec: 41931.9). Total num frames: 2751545344. Throughput: 0: 10599.1. Samples: 437863392. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:56:58,977][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:00,694][626795] Updated weights for policy 0, policy_version 335892 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:02,593][626795] Updated weights for policy 0, policy_version 335902 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:03,976][24592] Fps is (10 sec: 43416.7, 60 sec: 42599.8, 300 sec: 41959.7). Total num frames: 2751766528. Throughput: 0: 10591.5. Samples: 437928420. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:03,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000335909_2751766528.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:04,125][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000334680_2741698560.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:04,582][626795] Updated weights for policy 0, policy_version 335912 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:07,276][626795] Updated weights for policy 0, policy_version 335922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:08,976][24592] Fps is (10 sec: 39317.6, 60 sec: 41915.7, 300 sec: 41904.0). Total num frames: 2751938560. Throughput: 0: 10340.9. Samples: 437982840. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:08,979][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:09,311][626795] Updated weights for policy 0, policy_version 335932 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:11,271][626795] Updated weights for policy 0, policy_version 335942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:13,205][626795] Updated weights for policy 0, policy_version 335952 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:13,976][24592] Fps is (10 sec: 38502.8, 60 sec: 41779.1, 300 sec: 41904.2). Total num frames: 2752151552. Throughput: 0: 10327.7. Samples: 438014058. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:13,977][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:14,963][626795] Updated weights for policy 0, policy_version 335962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:16,922][626795] Updated weights for policy 0, policy_version 335972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:18,864][626795] Updated weights for policy 0, policy_version 335982 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:18,976][24592] Fps is (10 sec: 42601.5, 60 sec: 41779.2, 300 sec: 41904.2). Total num frames: 2752364544. Throughput: 0: 10327.9. Samples: 438078972. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:18,976][24592] Avg episode reward: [(0, '4.508')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:20,625][626795] Updated weights for policy 0, policy_version 335992 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:22,530][626795] Updated weights for policy 0, policy_version 336002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:23,975][24592] Fps is (10 sec: 43418.2, 60 sec: 41916.0, 300 sec: 41905.0). Total num frames: 2752585728. Throughput: 0: 10590.3. Samples: 438145248. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:23,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:24,405][626795] Updated weights for policy 0, policy_version 336012 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:26,341][626795] Updated weights for policy 0, policy_version 336022 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:28,250][626795] Updated weights for policy 0, policy_version 336032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:28,976][24592] Fps is (10 sec: 43417.5, 60 sec: 41779.0, 300 sec: 41904.1). Total num frames: 2752798720. Throughput: 0: 10583.8. Samples: 438177162. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:28,977][24592] Avg episode reward: [(0, '4.496')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:30,204][626795] Updated weights for policy 0, policy_version 336042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:32,072][626795] Updated weights for policy 0, policy_version 336052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:33,976][24592] Fps is (10 sec: 42597.8, 60 sec: 42325.2, 300 sec: 41876.4). Total num frames: 2753011712. Throughput: 0: 10548.6. Samples: 438241110. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:33,978][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:33,988][626795] Updated weights for policy 0, policy_version 336062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:35,937][626795] Updated weights for policy 0, policy_version 336072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:37,841][626795] Updated weights for policy 0, policy_version 336082 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:38,975][24592] Fps is (10 sec: 43418.4, 60 sec: 42461.8, 300 sec: 42015.2). Total num frames: 2753232896. Throughput: 0: 10544.8. Samples: 438305214. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:38,976][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:40,466][626795] Updated weights for policy 0, policy_version 336092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:42,472][626795] Updated weights for policy 0, policy_version 336102 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:43,987][24592] Fps is (10 sec: 39278.1, 60 sec: 41771.5, 300 sec: 41874.8). Total num frames: 2753404928. Throughput: 0: 10337.3. Samples: 438328686. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:43,988][24592] Avg episode reward: [(0, '4.766')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:44,397][626795] Updated weights for policy 0, policy_version 336112 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:46,390][626795] Updated weights for policy 0, policy_version 336122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:48,126][626795] Updated weights for policy 0, policy_version 336132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:48,975][24592] Fps is (10 sec: 39322.0, 60 sec: 41779.2, 300 sec: 41904.2). Total num frames: 2753626112. Throughput: 0: 10323.9. Samples: 438392994. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:48,976][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:50,133][626795] Updated weights for policy 0, policy_version 336142 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:51,968][626795] Updated weights for policy 0, policy_version 336152 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:53,803][626795] Updated weights for policy 0, policy_version 336162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:53,976][24592] Fps is (10 sec: 44284.6, 60 sec: 41915.5, 300 sec: 41904.1). Total num frames: 2753847296. Throughput: 0: 10576.8. Samples: 438458790. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:53,977][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:55,789][626795] Updated weights for policy 0, policy_version 336172 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:57,604][626795] Updated weights for policy 0, policy_version 336182 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:58,975][24592] Fps is (10 sec: 43417.6, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2754060288. Throughput: 0: 10589.6. Samples: 438490590. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:57:58,976][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:57:59,609][626795] Updated weights for policy 0, policy_version 336192 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:01,489][626795] Updated weights for policy 0, policy_version 336202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:03,357][626795] Updated weights for policy 0, policy_version 336212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:03,976][24592] Fps is (10 sec: 42599.0, 60 sec: 41779.2, 300 sec: 41932.0). Total num frames: 2754273280. Throughput: 0: 10568.7. Samples: 438554562. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:03,977][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:05,375][626795] Updated weights for policy 0, policy_version 336222 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:07,260][626795] Updated weights for policy 0, policy_version 336232 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42462.6, 300 sec: 41959.7). Total num frames: 2754486272. Throughput: 0: 10531.1. Samples: 438619146. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:08,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:09,212][626795] Updated weights for policy 0, policy_version 336242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:11,043][626795] Updated weights for policy 0, policy_version 336252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:13,737][626795] Updated weights for policy 0, policy_version 336262 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:13,975][24592] Fps is (10 sec: 39322.3, 60 sec: 41915.8, 300 sec: 41932.1). Total num frames: 2754666496. Throughput: 0: 10524.3. Samples: 438650754. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:13,979][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:15,761][626795] Updated weights for policy 0, policy_version 336272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:17,628][626795] Updated weights for policy 0, policy_version 336282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:18,975][24592] Fps is (10 sec: 38502.5, 60 sec: 41779.4, 300 sec: 41932.1). Total num frames: 2754871296. Throughput: 0: 10340.4. Samples: 438706428. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:18,977][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:19,599][626795] Updated weights for policy 0, policy_version 336292 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:21,343][626795] Updated weights for policy 0, policy_version 336302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:23,295][626795] Updated weights for policy 0, policy_version 336312 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:23,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41779.1, 300 sec: 41959.7). Total num frames: 2755092480. Throughput: 0: 10363.1. Samples: 438771552. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:23,977][24592] Avg episode reward: [(0, '4.853')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:25,122][626795] Updated weights for policy 0, policy_version 336322 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:26,992][626795] Updated weights for policy 0, policy_version 336332 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:28,920][626795] Updated weights for policy 0, policy_version 336342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:28,975][24592] Fps is (10 sec: 44236.8, 60 sec: 41916.0, 300 sec: 41987.5). Total num frames: 2755313664. Throughput: 0: 10570.4. Samples: 438804234. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:28,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:30,838][626795] Updated weights for policy 0, policy_version 336352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:32,738][626795] Updated weights for policy 0, policy_version 336362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:33,976][24592] Fps is (10 sec: 43416.0, 60 sec: 41915.5, 300 sec: 41960.0). Total num frames: 2755526656. Throughput: 0: 10591.1. Samples: 438869598. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:33,977][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:34,731][626795] Updated weights for policy 0, policy_version 336372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:36,540][626795] Updated weights for policy 0, policy_version 336382 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:38,429][626795] Updated weights for policy 0, policy_version 336392 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:38,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41779.3, 300 sec: 41959.7). Total num frames: 2755739648. Throughput: 0: 10552.4. Samples: 438933642. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:38,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:40,432][626795] Updated weights for policy 0, policy_version 336402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:42,301][626795] Updated weights for policy 0, policy_version 336412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:43,975][24592] Fps is (10 sec: 43419.6, 60 sec: 42606.4, 300 sec: 42098.5). Total num frames: 2755960832. Throughput: 0: 10552.0. Samples: 438965430. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:43,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:44,241][626795] Updated weights for policy 0, policy_version 336422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:46,846][626795] Updated weights for policy 0, policy_version 336432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:48,812][626795] Updated weights for policy 0, policy_version 336442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:48,975][24592] Fps is (10 sec: 39321.5, 60 sec: 41779.2, 300 sec: 41959.7). Total num frames: 2756132864. Throughput: 0: 10359.4. Samples: 439020732. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:48,977][24592] Avg episode reward: [(0, '4.606')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:50,712][626795] Updated weights for policy 0, policy_version 336452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:52,690][626795] Updated weights for policy 0, policy_version 336462 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:53,975][24592] Fps is (10 sec: 39322.1, 60 sec: 41779.6, 300 sec: 41987.5). Total num frames: 2756354048. Throughput: 0: 10374.2. Samples: 439085982. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:53,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:54,504][626795] Updated weights for policy 0, policy_version 336472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:56,414][626795] Updated weights for policy 0, policy_version 336482 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:58:58,309][626795] Updated weights for policy 0, policy_version 336492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:58,975][24592] Fps is (10 sec: 43417.8, 60 sec: 41779.2, 300 sec: 41987.5). Total num frames: 2756567040. Throughput: 0: 10383.0. Samples: 439117986. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:58:58,977][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:00,251][626795] Updated weights for policy 0, policy_version 336502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:02,043][626795] Updated weights for policy 0, policy_version 336512 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:03,933][626795] Updated weights for policy 0, policy_version 336522 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:03,976][24592] Fps is (10 sec: 43414.8, 60 sec: 41915.6, 300 sec: 41987.4). Total num frames: 2756788224. Throughput: 0: 10597.5. Samples: 439183320. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:03,977][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000336522_2756788224.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:04,058][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000335290_2746695680.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:05,958][626795] Updated weights for policy 0, policy_version 336532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:07,776][626795] Updated weights for policy 0, policy_version 336542 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:08,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41779.2, 300 sec: 41959.8). Total num frames: 2756993024. Throughput: 0: 10578.8. Samples: 439247598. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:08,977][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:09,715][626795] Updated weights for policy 0, policy_version 336552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:11,712][626795] Updated weights for policy 0, policy_version 336562 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:13,528][626795] Updated weights for policy 0, policy_version 336572 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:13,975][24592] Fps is (10 sec: 42600.8, 60 sec: 42462.0, 300 sec: 41959.7). Total num frames: 2757214208. Throughput: 0: 10571.5. Samples: 439279950. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:13,976][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:15,423][626795] Updated weights for policy 0, policy_version 336582 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:17,359][626795] Updated weights for policy 0, policy_version 336592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:18,975][24592] Fps is (10 sec: 40140.9, 60 sec: 42052.2, 300 sec: 41987.5). Total num frames: 2757394432. Throughput: 0: 10548.5. Samples: 439344276. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:18,977][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:20,094][626795] Updated weights for policy 0, policy_version 336602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:22,072][626795] Updated weights for policy 0, policy_version 336612 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:23,869][626795] Updated weights for policy 0, policy_version 336622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:23,975][24592] Fps is (10 sec: 39321.3, 60 sec: 41915.8, 300 sec: 41987.5). Total num frames: 2757607424. Throughput: 0: 10364.9. Samples: 439400064. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:23,977][24592] Avg episode reward: [(0, '4.612')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:25,819][626795] Updated weights for policy 0, policy_version 336632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:27,633][626795] Updated weights for policy 0, policy_version 336642 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:28,976][24592] Fps is (10 sec: 43416.6, 60 sec: 41915.5, 300 sec: 42015.2). Total num frames: 2757828608. Throughput: 0: 10379.0. Samples: 439432488. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:28,977][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:29,582][626795] Updated weights for policy 0, policy_version 336652 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:31,320][626795] Updated weights for policy 0, policy_version 336662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:33,297][626795] Updated weights for policy 0, policy_version 336672 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:33,976][24592] Fps is (10 sec: 43415.4, 60 sec: 41915.7, 300 sec: 41987.6). Total num frames: 2758041600. Throughput: 0: 10603.1. Samples: 439497876. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:33,977][24592] Avg episode reward: [(0, '4.442')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:35,155][626795] Updated weights for policy 0, policy_version 336682 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:37,092][626795] Updated weights for policy 0, policy_version 336692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:38,947][626795] Updated weights for policy 0, policy_version 336702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:38,975][24592] Fps is (10 sec: 43418.5, 60 sec: 42052.3, 300 sec: 41987.5). Total num frames: 2758262784. Throughput: 0: 10607.2. Samples: 439563306. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:38,977][24592] Avg episode reward: [(0, '4.865')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:40,826][626795] Updated weights for policy 0, policy_version 336712 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:42,777][626795] Updated weights for policy 0, policy_version 336722 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:43,976][24592] Fps is (10 sec: 43418.0, 60 sec: 41915.4, 300 sec: 41987.4). Total num frames: 2758475776. Throughput: 0: 10616.0. Samples: 439595712. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:43,977][24592] Avg episode reward: [(0, '4.440')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:44,723][626795] Updated weights for policy 0, policy_version 336732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:46,505][626795] Updated weights for policy 0, policy_version 336742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:48,386][626795] Updated weights for policy 0, policy_version 336752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:48,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42598.4, 300 sec: 42097.3). Total num frames: 2758688768. Throughput: 0: 10579.1. Samples: 439659372. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:48,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:50,481][626795] Updated weights for policy 0, policy_version 336762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:53,171][626795] Updated weights for policy 0, policy_version 336772 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:53,976][24592] Fps is (10 sec: 39322.5, 60 sec: 41915.5, 300 sec: 41987.6). Total num frames: 2758868992. Throughput: 0: 10374.4. Samples: 439714446. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:53,979][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:55,106][626795] Updated weights for policy 0, policy_version 336782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:57,132][626795] Updated weights for policy 0, policy_version 336792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 19:59:58,935][626795] Updated weights for policy 0, policy_version 336802 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:58,975][24592] Fps is (10 sec: 39321.6, 60 sec: 41915.7, 300 sec: 41987.5). Total num frames: 2759081984. Throughput: 0: 10354.3. Samples: 439745892. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 19:59:58,976][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:00,785][626795] Updated weights for policy 0, policy_version 336812 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:02,685][626795] Updated weights for policy 0, policy_version 336822 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:03,977][24592] Fps is (10 sec: 43412.1, 60 sec: 41915.0, 300 sec: 42015.0). Total num frames: 2759303168. Throughput: 0: 10392.5. Samples: 439811952. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:03,978][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:04,600][626795] Updated weights for policy 0, policy_version 336832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:06,443][626795] Updated weights for policy 0, policy_version 336842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:08,313][626795] Updated weights for policy 0, policy_version 336852 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:08,976][24592] Fps is (10 sec: 43416.4, 60 sec: 42052.1, 300 sec: 41987.4). Total num frames: 2759516160. Throughput: 0: 10621.1. Samples: 439878018. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:08,977][24592] Avg episode reward: [(0, '5.111')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:10,241][626795] Updated weights for policy 0, policy_version 336862 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:12,026][626795] Updated weights for policy 0, policy_version 336872 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:13,764][626795] Updated weights for policy 0, policy_version 336882 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:13,975][24592] Fps is (10 sec: 43424.1, 60 sec: 42052.2, 300 sec: 42015.2). Total num frames: 2759737344. Throughput: 0: 10638.5. Samples: 439911216. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:13,976][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:15,783][626795] Updated weights for policy 0, policy_version 336892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:17,692][626795] Updated weights for policy 0, policy_version 336902 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:18,975][24592] Fps is (10 sec: 44237.7, 60 sec: 42734.9, 300 sec: 42015.2). Total num frames: 2759958528. Throughput: 0: 10631.2. Samples: 439976274. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:18,976][24592] Avg episode reward: [(0, '4.873')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:19,563][626795] Updated weights for policy 0, policy_version 336912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:21,424][626795] Updated weights for policy 0, policy_version 336922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:23,368][626795] Updated weights for policy 0, policy_version 336932 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:23,976][24592] Fps is (10 sec: 42597.1, 60 sec: 42598.2, 300 sec: 42126.3). Total num frames: 2760163328. Throughput: 0: 10597.3. Samples: 440040186. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:23,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:26,221][626795] Updated weights for policy 0, policy_version 336942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:28,224][626795] Updated weights for policy 0, policy_version 336952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:28,975][24592] Fps is (10 sec: 37683.5, 60 sec: 41779.4, 300 sec: 41987.5). Total num frames: 2760335360. Throughput: 0: 10361.7. Samples: 440061984. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:28,976][24592] Avg episode reward: [(0, '5.129')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:30,036][626795] Updated weights for policy 0, policy_version 336962 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:31,958][626795] Updated weights for policy 0, policy_version 336972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:33,777][626795] Updated weights for policy 0, policy_version 336982 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:33,975][24592] Fps is (10 sec: 40141.9, 60 sec: 42052.6, 300 sec: 42015.2). Total num frames: 2760564736. Throughput: 0: 10403.6. Samples: 440127534. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:33,976][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:35,543][626795] Updated weights for policy 0, policy_version 336992 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:37,404][626795] Updated weights for policy 0, policy_version 337002 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:38,975][24592] Fps is (10 sec: 45056.0, 60 sec: 42052.3, 300 sec: 42015.2). Total num frames: 2760785920. Throughput: 0: 10672.7. Samples: 440194716. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:38,977][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:39,283][626795] Updated weights for policy 0, policy_version 337012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:41,200][626795] Updated weights for policy 0, policy_version 337022 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:43,103][626795] Updated weights for policy 0, policy_version 337032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:43,976][24592] Fps is (10 sec: 43416.0, 60 sec: 42052.3, 300 sec: 42015.2). Total num frames: 2760998912. Throughput: 0: 10681.9. Samples: 440226582. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:43,978][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:45,073][626795] Updated weights for policy 0, policy_version 337042 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:46,902][626795] Updated weights for policy 0, policy_version 337052 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:48,873][626795] Updated weights for policy 0, policy_version 337062 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:48,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42052.3, 300 sec: 42015.3). Total num frames: 2761211904. Throughput: 0: 10649.8. Samples: 440291178. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:48,976][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:50,841][626795] Updated weights for policy 0, policy_version 337072 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:52,730][626795] Updated weights for policy 0, policy_version 337082 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:53,975][24592] Fps is (10 sec: 42600.1, 60 sec: 42598.6, 300 sec: 41987.5). Total num frames: 2761424896. Throughput: 0: 10579.3. Samples: 440354082. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:53,976][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:54,774][626795] Updated weights for policy 0, policy_version 337092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:56,640][626795] Updated weights for policy 0, policy_version 337102 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:58,975][24592] Fps is (10 sec: 39321.4, 60 sec: 42052.2, 300 sec: 42015.6). Total num frames: 2761605120. Throughput: 0: 10540.1. Samples: 440385522. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:00:58,977][24592] Avg episode reward: [(0, '4.295')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:00:59,478][626795] Updated weights for policy 0, policy_version 337112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:01,398][626795] Updated weights for policy 0, policy_version 337122 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:03,320][626795] Updated weights for policy 0, policy_version 337132 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:03,976][24592] Fps is (10 sec: 38500.9, 60 sec: 41780.0, 300 sec: 41987.5). Total num frames: 2761809920. Throughput: 0: 10308.3. Samples: 440440152. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:03,977][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000337135_2761809920.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:04,104][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000335909_2751766528.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:05,300][626795] Updated weights for policy 0, policy_version 337142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:07,108][626795] Updated weights for policy 0, policy_version 337152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:08,838][626795] Updated weights for policy 0, policy_version 337162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:08,976][24592] Fps is (10 sec: 42596.1, 60 sec: 41915.5, 300 sec: 41987.4). Total num frames: 2762031104. Throughput: 0: 10333.3. Samples: 440505186. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:08,977][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:10,846][626795] Updated weights for policy 0, policy_version 337172 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:12,708][626795] Updated weights for policy 0, policy_version 337182 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:13,975][24592] Fps is (10 sec: 43419.2, 60 sec: 41779.2, 300 sec: 41987.5). Total num frames: 2762244096. Throughput: 0: 10582.8. Samples: 440538210. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:13,976][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:14,577][626795] Updated weights for policy 0, policy_version 337192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:16,464][626795] Updated weights for policy 0, policy_version 337202 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:18,378][626795] Updated weights for policy 0, policy_version 337212 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:18,975][24592] Fps is (10 sec: 43420.0, 60 sec: 41779.2, 300 sec: 42015.3). Total num frames: 2762465280. Throughput: 0: 10562.4. Samples: 440602842. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:18,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:20,273][626795] Updated weights for policy 0, policy_version 337222 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:22,176][626795] Updated weights for policy 0, policy_version 337232 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:23,975][24592] Fps is (10 sec: 43417.1, 60 sec: 41915.9, 300 sec: 41987.4). Total num frames: 2762678272. Throughput: 0: 10515.7. Samples: 440667924. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:23,977][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:24,016][626795] Updated weights for policy 0, policy_version 337242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:25,987][626795] Updated weights for policy 0, policy_version 337252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:27,858][626795] Updated weights for policy 0, policy_version 337262 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:28,976][24592] Fps is (10 sec: 42597.7, 60 sec: 42598.3, 300 sec: 42098.5). Total num frames: 2762891264. Throughput: 0: 10509.7. Samples: 440699514. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:28,978][24592] Avg episode reward: [(0, '4.849')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:29,815][626795] Updated weights for policy 0, policy_version 337272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:32,559][626795] Updated weights for policy 0, policy_version 337282 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:33,975][24592] Fps is (10 sec: 39321.7, 60 sec: 41779.2, 300 sec: 41987.5). Total num frames: 2763071488. Throughput: 0: 10310.9. Samples: 440755170. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:33,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:34,559][626795] Updated weights for policy 0, policy_version 337292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:36,318][626795] Updated weights for policy 0, policy_version 337302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:38,192][626795] Updated weights for policy 0, policy_version 337312 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:38,975][24592] Fps is (10 sec: 40141.1, 60 sec: 41779.1, 300 sec: 42015.2). Total num frames: 2763292672. Throughput: 0: 10374.1. Samples: 440820918. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:38,977][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:39,972][626795] Updated weights for policy 0, policy_version 337322 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:41,919][626795] Updated weights for policy 0, policy_version 337332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:43,666][626795] Updated weights for policy 0, policy_version 337342 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:43,975][24592] Fps is (10 sec: 43418.0, 60 sec: 41779.5, 300 sec: 41987.5). Total num frames: 2763505664. Throughput: 0: 10421.6. Samples: 440854494. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:43,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:45,658][626795] Updated weights for policy 0, policy_version 337352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:47,575][626795] Updated weights for policy 0, policy_version 337362 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:48,975][24592] Fps is (10 sec: 43418.2, 60 sec: 41915.7, 300 sec: 42015.2). Total num frames: 2763726848. Throughput: 0: 10647.3. Samples: 440919276. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:48,977][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:49,490][626795] Updated weights for policy 0, policy_version 337372 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:51,347][626795] Updated weights for policy 0, policy_version 337382 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:53,365][626795] Updated weights for policy 0, policy_version 337392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:53,976][24592] Fps is (10 sec: 43416.8, 60 sec: 41915.6, 300 sec: 42015.2). Total num frames: 2763939840. Throughput: 0: 10627.0. Samples: 440983398. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:53,977][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:55,259][626795] Updated weights for policy 0, policy_version 337402 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:57,264][626795] Updated weights for policy 0, policy_version 337412 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:58,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42461.9, 300 sec: 41987.5). Total num frames: 2764152832. Throughput: 0: 10563.6. Samples: 441013572. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:01:58,976][24592] Avg episode reward: [(0, '4.898')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:01:59,213][626795] Updated weights for policy 0, policy_version 337422 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:01,033][626795] Updated weights for policy 0, policy_version 337432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:03,042][626795] Updated weights for policy 0, policy_version 337442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:03,976][24592] Fps is (10 sec: 39321.7, 60 sec: 42052.4, 300 sec: 42015.4). Total num frames: 2764333056. Throughput: 0: 10544.9. Samples: 441077364. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:03,978][24592] Avg episode reward: [(0, '4.334')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:05,870][626795] Updated weights for policy 0, policy_version 337452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:07,712][626795] Updated weights for policy 0, policy_version 337462 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:08,976][24592] Fps is (10 sec: 38500.8, 60 sec: 41779.3, 300 sec: 41987.4). Total num frames: 2764537856. Throughput: 0: 10352.5. Samples: 441133788. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:08,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:09,552][626795] Updated weights for policy 0, policy_version 337472 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:11,358][626795] Updated weights for policy 0, policy_version 337482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:13,023][626795] Updated weights for policy 0, policy_version 337492 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:13,976][24592] Fps is (10 sec: 43417.7, 60 sec: 42052.2, 300 sec: 42043.0). Total num frames: 2764767232. Throughput: 0: 10403.3. Samples: 441167664. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:13,978][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:14,924][626795] Updated weights for policy 0, policy_version 337502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:16,830][626795] Updated weights for policy 0, policy_version 337512 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:18,763][626795] Updated weights for policy 0, policy_version 337522 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:18,976][24592] Fps is (10 sec: 45057.4, 60 sec: 42052.2, 300 sec: 42043.0). Total num frames: 2764988416. Throughput: 0: 10634.1. Samples: 441233706. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:18,976][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:20,606][626795] Updated weights for policy 0, policy_version 337532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:22,544][626795] Updated weights for policy 0, policy_version 337542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:23,975][24592] Fps is (10 sec: 43418.1, 60 sec: 42052.3, 300 sec: 42043.0). Total num frames: 2765201408. Throughput: 0: 10615.5. Samples: 441298614. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:23,978][24592] Avg episode reward: [(0, '4.472')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:24,476][626795] Updated weights for policy 0, policy_version 337552 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:26,409][626795] Updated weights for policy 0, policy_version 337562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:28,272][626795] Updated weights for policy 0, policy_version 337572 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:28,976][24592] Fps is (10 sec: 42598.3, 60 sec: 42052.3, 300 sec: 42043.0). Total num frames: 2765414400. Throughput: 0: 10580.2. Samples: 441330606. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:28,977][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:30,391][626795] Updated weights for policy 0, policy_version 337582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:32,206][626795] Updated weights for policy 0, policy_version 337592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:33,976][24592] Fps is (10 sec: 42597.3, 60 sec: 42598.3, 300 sec: 42015.2). Total num frames: 2765627392. Throughput: 0: 10536.3. Samples: 441393414. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:33,977][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:34,270][626795] Updated weights for policy 0, policy_version 337602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:36,923][626795] Updated weights for policy 0, policy_version 337612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:38,855][626795] Updated weights for policy 0, policy_version 337622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:38,975][24592] Fps is (10 sec: 39322.5, 60 sec: 41915.8, 300 sec: 42044.6). Total num frames: 2765807616. Throughput: 0: 10335.9. Samples: 441448512. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:38,976][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:40,785][626795] Updated weights for policy 0, policy_version 337632 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:42,606][626795] Updated weights for policy 0, policy_version 337642 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:43,975][24592] Fps is (10 sec: 40142.1, 60 sec: 42052.3, 300 sec: 42043.0). Total num frames: 2766028800. Throughput: 0: 10375.9. Samples: 441480486. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:43,976][24592] Avg episode reward: [(0, '4.779')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:44,397][626795] Updated weights for policy 0, policy_version 337652 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:46,067][626795] Updated weights for policy 0, policy_version 337662 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:48,032][626795] Updated weights for policy 0, policy_version 337672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:48,975][24592] Fps is (10 sec: 44236.3, 60 sec: 42052.2, 300 sec: 42043.1). Total num frames: 2766249984. Throughput: 0: 10482.6. Samples: 441549078. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:48,977][24592] Avg episode reward: [(0, '4.594')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:49,904][626795] Updated weights for policy 0, policy_version 337682 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:51,810][626795] Updated weights for policy 0, policy_version 337692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:53,798][626795] Updated weights for policy 0, policy_version 337702 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:53,975][24592] Fps is (10 sec: 43417.3, 60 sec: 42052.4, 300 sec: 42043.0). Total num frames: 2766462976. Throughput: 0: 10662.6. Samples: 441613602. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:53,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:55,759][626795] Updated weights for policy 0, policy_version 337712 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:56,023][626772] Signal inference workers to stop experience collection... (5700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:56,037][626772] Signal inference workers to resume experience collection... (5700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:56,042][626795] InferenceWorker_p0-w0: stopping experience collection (5700 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:56,042][626795] InferenceWorker_p0-w0: resuming experience collection (5700 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:57,635][626795] Updated weights for policy 0, policy_version 337722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:58,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42052.3, 300 sec: 42043.0). Total num frames: 2766675968. Throughput: 0: 10604.3. Samples: 441644856. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:02:58,976][24592] Avg episode reward: [(0, '5.018')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:02:59,609][626795] Updated weights for policy 0, policy_version 337732 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:01,563][626795] Updated weights for policy 0, policy_version 337742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:03,402][626795] Updated weights for policy 0, policy_version 337752 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:03,976][24592] Fps is (10 sec: 41774.8, 60 sec: 42461.2, 300 sec: 42015.1). Total num frames: 2766880768. Throughput: 0: 10531.4. Samples: 441707628. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:03,979][24592] Avg episode reward: [(0, '4.851')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000337754_2766880768.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:04,107][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000336522_2756788224.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:05,511][626795] Updated weights for policy 0, policy_version 337762 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:07,387][626795] Updated weights for policy 0, policy_version 337772 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:08,976][24592] Fps is (10 sec: 41777.8, 60 sec: 42598.5, 300 sec: 42126.3). Total num frames: 2767093760. Throughput: 0: 10494.3. Samples: 441770862. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:08,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:10,060][626795] Updated weights for policy 0, policy_version 337782 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:11,953][626795] Updated weights for policy 0, policy_version 337792 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:13,883][626795] Updated weights for policy 0, policy_version 337802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:13,975][24592] Fps is (10 sec: 39325.9, 60 sec: 41779.3, 300 sec: 42043.0). Total num frames: 2767273984. Throughput: 0: 10325.6. Samples: 441795258. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:13,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:15,671][626795] Updated weights for policy 0, policy_version 337812 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:17,361][626795] Updated weights for policy 0, policy_version 337822 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:18,976][24592] Fps is (10 sec: 40958.9, 60 sec: 41915.4, 300 sec: 42070.7). Total num frames: 2767503360. Throughput: 0: 10438.7. Samples: 441863160. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:18,978][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:19,232][626795] Updated weights for policy 0, policy_version 337832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:21,160][626795] Updated weights for policy 0, policy_version 337842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:22,949][626795] Updated weights for policy 0, policy_version 337852 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:23,975][24592] Fps is (10 sec: 44236.4, 60 sec: 41915.7, 300 sec: 42043.0). Total num frames: 2767716352. Throughput: 0: 10657.8. Samples: 441928116. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:23,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:24,934][626795] Updated weights for policy 0, policy_version 337862 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:26,996][626795] Updated weights for policy 0, policy_version 337872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:28,833][626795] Updated weights for policy 0, policy_version 337882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:28,976][24592] Fps is (10 sec: 42599.3, 60 sec: 41915.6, 300 sec: 42043.0). Total num frames: 2767929344. Throughput: 0: 10636.8. Samples: 441959148. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:28,977][24592] Avg episode reward: [(0, '4.898')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:30,805][626795] Updated weights for policy 0, policy_version 337892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:32,697][626795] Updated weights for policy 0, policy_version 337902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:33,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41915.9, 300 sec: 42043.0). Total num frames: 2768142336. Throughput: 0: 10530.7. Samples: 442022958. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:33,977][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:34,754][626795] Updated weights for policy 0, policy_version 337912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:36,624][626795] Updated weights for policy 0, policy_version 337922 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:38,436][626795] Updated weights for policy 0, policy_version 337932 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:38,976][24592] Fps is (10 sec: 42598.4, 60 sec: 42461.6, 300 sec: 42015.2). Total num frames: 2768355328. Throughput: 0: 10537.1. Samples: 442087776. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:38,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:40,590][626795] Updated weights for policy 0, policy_version 337942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:43,036][626795] Updated weights for policy 0, policy_version 337952 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:43,975][24592] Fps is (10 sec: 39321.6, 60 sec: 41779.2, 300 sec: 42043.0). Total num frames: 2768535552. Throughput: 0: 10490.5. Samples: 442116930. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:43,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:45,015][626795] Updated weights for policy 0, policy_version 337962 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:46,762][626795] Updated weights for policy 0, policy_version 337972 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:48,632][626795] Updated weights for policy 0, policy_version 337982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:48,975][24592] Fps is (10 sec: 40961.4, 60 sec: 41915.7, 300 sec: 42070.8). Total num frames: 2768764928. Throughput: 0: 10408.0. Samples: 442175976. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:48,977][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:50,351][626795] Updated weights for policy 0, policy_version 337992 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:52,103][626795] Updated weights for policy 0, policy_version 338002 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:53,975][24592] Fps is (10 sec: 45056.0, 60 sec: 42052.3, 300 sec: 42098.5). Total num frames: 2768986112. Throughput: 0: 10544.9. Samples: 442245378. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:53,976][24592] Avg episode reward: [(0, '4.641')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:54,049][626795] Updated weights for policy 0, policy_version 338012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:56,015][626795] Updated weights for policy 0, policy_version 338022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:03:58,017][626795] Updated weights for policy 0, policy_version 338032 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:58,976][24592] Fps is (10 sec: 42598.2, 60 sec: 41915.7, 300 sec: 42043.1). Total num frames: 2769190912. Throughput: 0: 10673.3. Samples: 442275558. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:03:58,977][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:00,004][626795] Updated weights for policy 0, policy_version 338042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:01,964][626795] Updated weights for policy 0, policy_version 338052 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:03,767][626795] Updated weights for policy 0, policy_version 338062 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:03,976][24592] Fps is (10 sec: 41778.5, 60 sec: 42052.9, 300 sec: 42070.8). Total num frames: 2769403904. Throughput: 0: 10587.3. Samples: 442339584. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:03,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:05,871][626795] Updated weights for policy 0, policy_version 338072 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:07,798][626795] Updated weights for policy 0, policy_version 338082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:08,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42052.6, 300 sec: 42043.0). Total num frames: 2769616896. Throughput: 0: 10540.8. Samples: 442402452. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:08,976][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:09,600][626795] Updated weights for policy 0, policy_version 338092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:11,471][626795] Updated weights for policy 0, policy_version 338102 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:13,443][626795] Updated weights for policy 0, policy_version 338112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:13,976][24592] Fps is (10 sec: 42598.5, 60 sec: 42598.3, 300 sec: 42154.1). Total num frames: 2769829888. Throughput: 0: 10573.9. Samples: 442434972. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:13,976][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:16,088][626795] Updated weights for policy 0, policy_version 338122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:18,017][626795] Updated weights for policy 0, policy_version 338132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:18,975][24592] Fps is (10 sec: 40140.5, 60 sec: 41916.1, 300 sec: 42070.8). Total num frames: 2770018304. Throughput: 0: 10405.3. Samples: 442491198. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:18,977][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:20,009][626795] Updated weights for policy 0, policy_version 338142 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:21,807][626795] Updated weights for policy 0, policy_version 338152 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:23,702][626795] Updated weights for policy 0, policy_version 338162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:23,976][24592] Fps is (10 sec: 40139.6, 60 sec: 41915.5, 300 sec: 42043.0). Total num frames: 2770231296. Throughput: 0: 10402.1. Samples: 442555872. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:23,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:25,650][626795] Updated weights for policy 0, policy_version 338172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:27,558][626795] Updated weights for policy 0, policy_version 338182 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:28,988][24592] Fps is (10 sec: 42544.1, 60 sec: 41907.1, 300 sec: 42041.3). Total num frames: 2770444288. Throughput: 0: 10455.3. Samples: 442587552. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:28,989][24592] Avg episode reward: [(0, '4.400')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:29,494][626795] Updated weights for policy 0, policy_version 338192 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:31,401][626795] Updated weights for policy 0, policy_version 338202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:33,264][626795] Updated weights for policy 0, policy_version 338212 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:33,975][24592] Fps is (10 sec: 42600.3, 60 sec: 41915.7, 300 sec: 42015.2). Total num frames: 2770657280. Throughput: 0: 10584.5. Samples: 442652280. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:33,976][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:35,167][626795] Updated weights for policy 0, policy_version 338222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:37,117][626795] Updated weights for policy 0, policy_version 338232 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:38,975][24592] Fps is (10 sec: 42652.7, 60 sec: 41916.0, 300 sec: 42015.3). Total num frames: 2770870272. Throughput: 0: 10464.4. Samples: 442716276. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:38,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:39,057][626795] Updated weights for policy 0, policy_version 338242 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:40,993][626795] Updated weights for policy 0, policy_version 338252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:42,854][626795] Updated weights for policy 0, policy_version 338262 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:43,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42461.8, 300 sec: 42015.2). Total num frames: 2771083264. Throughput: 0: 10494.0. Samples: 442747788. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:43,976][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:44,782][626795] Updated weights for policy 0, policy_version 338272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:46,791][626795] Updated weights for policy 0, policy_version 338282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:48,976][24592] Fps is (10 sec: 40139.6, 60 sec: 41779.0, 300 sec: 42043.0). Total num frames: 2771271680. Throughput: 0: 10486.5. Samples: 442811478. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:48,977][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:49,218][626795] Updated weights for policy 0, policy_version 338292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:51,183][626795] Updated weights for policy 0, policy_version 338302 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:53,086][626795] Updated weights for policy 0, policy_version 338312 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:53,976][24592] Fps is (10 sec: 40140.3, 60 sec: 41642.5, 300 sec: 42043.0). Total num frames: 2771484672. Throughput: 0: 10379.4. Samples: 442869528. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:53,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:54,973][626795] Updated weights for policy 0, policy_version 338322 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:56,991][626795] Updated weights for policy 0, policy_version 338332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:04:58,855][626795] Updated weights for policy 0, policy_version 338342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:58,975][24592] Fps is (10 sec: 42600.2, 60 sec: 41779.3, 300 sec: 42015.5). Total num frames: 2771697664. Throughput: 0: 10359.1. Samples: 442901130. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:04:58,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:00,837][626795] Updated weights for policy 0, policy_version 338352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:02,838][626795] Updated weights for policy 0, policy_version 338362 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:03,976][24592] Fps is (10 sec: 41779.0, 60 sec: 41642.6, 300 sec: 41987.5). Total num frames: 2771902464. Throughput: 0: 10507.4. Samples: 442964034. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:03,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000338367_2771902464.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:04,129][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000337135_2761809920.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:04,973][626795] Updated weights for policy 0, policy_version 338372 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:06,894][626795] Updated weights for policy 0, policy_version 338382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:08,912][626795] Updated weights for policy 0, policy_version 338392 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:08,976][24592] Fps is (10 sec: 40959.2, 60 sec: 41506.0, 300 sec: 41931.9). Total num frames: 2772107264. Throughput: 0: 10434.6. Samples: 443025426. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:08,976][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:10,840][626795] Updated weights for policy 0, policy_version 338402 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:12,864][626795] Updated weights for policy 0, policy_version 338412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:13,975][24592] Fps is (10 sec: 41780.0, 60 sec: 41506.2, 300 sec: 41904.2). Total num frames: 2772320256. Throughput: 0: 10405.5. Samples: 443055666. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:13,976][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:14,735][626795] Updated weights for policy 0, policy_version 338422 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:16,697][626795] Updated weights for policy 0, policy_version 338432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:18,640][626795] Updated weights for policy 0, policy_version 338442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:18,976][24592] Fps is (10 sec: 40960.0, 60 sec: 41642.6, 300 sec: 41876.4). Total num frames: 2772516864. Throughput: 0: 10377.6. Samples: 443119272. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:18,976][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:20,682][626795] Updated weights for policy 0, policy_version 338452 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:22,565][626795] Updated weights for policy 0, policy_version 338462 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:23,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41779.5, 300 sec: 42043.0). Total num frames: 2772738048. Throughput: 0: 10362.8. Samples: 443182602. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:23,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:24,607][626795] Updated weights for policy 0, policy_version 338472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:26,234][626795] Updated weights for policy 0, policy_version 338482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:27,984][626795] Updated weights for policy 0, policy_version 338492 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:28,988][24592] Fps is (10 sec: 44179.9, 60 sec: 41915.6, 300 sec: 42013.4). Total num frames: 2772959232. Throughput: 0: 10406.9. Samples: 443216232. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:28,989][24592] Avg episode reward: [(0, '4.824')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:29,822][626795] Updated weights for policy 0, policy_version 338502 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:31,641][626795] Updated weights for policy 0, policy_version 338512 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:33,509][626795] Updated weights for policy 0, policy_version 338522 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:33,975][24592] Fps is (10 sec: 44236.6, 60 sec: 42052.3, 300 sec: 42015.2). Total num frames: 2773180416. Throughput: 0: 10495.8. Samples: 443283786. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:33,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:35,547][626795] Updated weights for policy 0, policy_version 338532 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:37,470][626795] Updated weights for policy 0, policy_version 338542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:38,975][24592] Fps is (10 sec: 44294.6, 60 sec: 42188.9, 300 sec: 42043.1). Total num frames: 2773401600. Throughput: 0: 10616.0. Samples: 443347248. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:38,977][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:39,400][626795] Updated weights for policy 0, policy_version 338552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:41,343][626795] Updated weights for policy 0, policy_version 338562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:43,494][626795] Updated weights for policy 0, policy_version 338572 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:43,975][24592] Fps is (10 sec: 40959.7, 60 sec: 41779.2, 300 sec: 41959.7). Total num frames: 2773590016. Throughput: 0: 10597.4. Samples: 443378016. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:43,976][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:45,592][626795] Updated weights for policy 0, policy_version 338582 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:47,628][626795] Updated weights for policy 0, policy_version 338592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:48,975][24592] Fps is (10 sec: 39321.3, 60 sec: 42052.5, 300 sec: 41931.9). Total num frames: 2773794816. Throughput: 0: 10506.7. Samples: 443436834. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:48,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:49,722][626795] Updated weights for policy 0, policy_version 338602 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:51,665][626795] Updated weights for policy 0, policy_version 338612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:53,566][626795] Updated weights for policy 0, policy_version 338622 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:53,975][24592] Fps is (10 sec: 41779.5, 60 sec: 42052.4, 300 sec: 42043.0). Total num frames: 2774007808. Throughput: 0: 10536.2. Samples: 443499552. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:53,977][24592] Avg episode reward: [(0, '4.952')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:55,630][626795] Updated weights for policy 0, policy_version 338632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:57,427][626795] Updated weights for policy 0, policy_version 338642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:58,975][24592] Fps is (10 sec: 41779.9, 60 sec: 41915.8, 300 sec: 42043.1). Total num frames: 2774212608. Throughput: 0: 10539.8. Samples: 443529954. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:05:58,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:05:59,519][626795] Updated weights for policy 0, policy_version 338652 (0.0048)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:01,402][626795] Updated weights for policy 0, policy_version 338662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:03,369][626795] Updated weights for policy 0, policy_version 338672 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:03,976][24592] Fps is (10 sec: 41778.4, 60 sec: 42052.3, 300 sec: 42015.3). Total num frames: 2774425600. Throughput: 0: 10534.4. Samples: 443593320. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:03,977][24592] Avg episode reward: [(0, '4.388')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:05,346][626795] Updated weights for policy 0, policy_version 338682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:07,242][626795] Updated weights for policy 0, policy_version 338692 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:08,975][24592] Fps is (10 sec: 41778.8, 60 sec: 42052.4, 300 sec: 41987.5). Total num frames: 2774630400. Throughput: 0: 10530.7. Samples: 443656482. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:08,976][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:09,279][626795] Updated weights for policy 0, policy_version 338702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:11,217][626795] Updated weights for policy 0, policy_version 338712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:13,126][626795] Updated weights for policy 0, policy_version 338722 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:13,976][24592] Fps is (10 sec: 41779.3, 60 sec: 42052.2, 300 sec: 41959.7). Total num frames: 2774843392. Throughput: 0: 10471.7. Samples: 443687322. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:13,977][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:15,081][626795] Updated weights for policy 0, policy_version 338732 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:17,082][626795] Updated weights for policy 0, policy_version 338742 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:18,976][24592] Fps is (10 sec: 41778.5, 60 sec: 42188.8, 300 sec: 41931.9). Total num frames: 2775048192. Throughput: 0: 10363.4. Samples: 443750142. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:18,977][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:19,078][626795] Updated weights for policy 0, policy_version 338752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:20,959][626795] Updated weights for policy 0, policy_version 338762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:22,984][626795] Updated weights for policy 0, policy_version 338772 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:23,975][24592] Fps is (10 sec: 41780.2, 60 sec: 42052.3, 300 sec: 41932.0). Total num frames: 2775261184. Throughput: 0: 10339.7. Samples: 443812536. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:23,976][24592] Avg episode reward: [(0, '4.559')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:25,342][626795] Updated weights for policy 0, policy_version 338782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:27,364][626795] Updated weights for policy 0, policy_version 338792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:28,975][24592] Fps is (10 sec: 40141.4, 60 sec: 41515.1, 300 sec: 41959.7). Total num frames: 2775449600. Throughput: 0: 10256.4. Samples: 443839554. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:28,978][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:29,283][626795] Updated weights for policy 0, policy_version 338802 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:31,344][626795] Updated weights for policy 0, policy_version 338812 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:33,219][626795] Updated weights for policy 0, policy_version 338822 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:33,976][24592] Fps is (10 sec: 39320.9, 60 sec: 41233.0, 300 sec: 41904.2). Total num frames: 2775654400. Throughput: 0: 10337.6. Samples: 443902026. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:33,978][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:35,142][626795] Updated weights for policy 0, policy_version 338832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:37,100][626795] Updated weights for policy 0, policy_version 338842 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:38,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41096.5, 300 sec: 41904.2). Total num frames: 2775867392. Throughput: 0: 10360.0. Samples: 443965752. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:38,976][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:39,100][626795] Updated weights for policy 0, policy_version 338852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:41,029][626795] Updated weights for policy 0, policy_version 338862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:42,874][626795] Updated weights for policy 0, policy_version 338872 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:43,977][24592] Fps is (10 sec: 42591.9, 60 sec: 41505.0, 300 sec: 41876.2). Total num frames: 2776080384. Throughput: 0: 10382.1. Samples: 443997168. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:43,978][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:44,761][626795] Updated weights for policy 0, policy_version 338882 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:46,712][626795] Updated weights for policy 0, policy_version 338892 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:48,572][626795] Updated weights for policy 0, policy_version 338902 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:48,975][24592] Fps is (10 sec: 43417.8, 60 sec: 41779.2, 300 sec: 41904.2). Total num frames: 2776301568. Throughput: 0: 10414.5. Samples: 444061968. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:48,977][24592] Avg episode reward: [(0, '4.697')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:50,536][626795] Updated weights for policy 0, policy_version 338912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:52,467][626795] Updated weights for policy 0, policy_version 338922 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:53,975][24592] Fps is (10 sec: 42605.2, 60 sec: 41642.6, 300 sec: 41876.4). Total num frames: 2776506368. Throughput: 0: 10423.8. Samples: 444125556. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:53,977][24592] Avg episode reward: [(0, '4.434')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:54,303][626795] Updated weights for policy 0, policy_version 338932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:56,345][626795] Updated weights for policy 0, policy_version 338942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:06:58,655][626795] Updated weights for policy 0, policy_version 338952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:58,976][24592] Fps is (10 sec: 40139.4, 60 sec: 41505.8, 300 sec: 41931.9). Total num frames: 2776702976. Throughput: 0: 10451.3. Samples: 444157632. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:06:58,977][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:00,606][626795] Updated weights for policy 0, policy_version 338962 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:02,411][626795] Updated weights for policy 0, policy_version 338972 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:03,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41642.8, 300 sec: 41987.5). Total num frames: 2776924160. Throughput: 0: 10381.1. Samples: 444217290. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:03,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000338980_2776924160.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:04,116][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000337754_2766880768.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:04,422][626795] Updated weights for policy 0, policy_version 338982 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:06,250][626795] Updated weights for policy 0, policy_version 338992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:08,114][626795] Updated weights for policy 0, policy_version 339002 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:08,977][24592] Fps is (10 sec: 43413.2, 60 sec: 41778.3, 300 sec: 41931.8). Total num frames: 2777137152. Throughput: 0: 10429.3. Samples: 444281868. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:08,978][24592] Avg episode reward: [(0, '4.520')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:10,184][626795] Updated weights for policy 0, policy_version 339012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:12,025][626795] Updated weights for policy 0, policy_version 339022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:13,820][626795] Updated weights for policy 0, policy_version 339032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:13,976][24592] Fps is (10 sec: 42596.7, 60 sec: 41779.0, 300 sec: 41904.1). Total num frames: 2777350144. Throughput: 0: 10542.8. Samples: 444313986. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:13,977][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:15,811][626795] Updated weights for policy 0, policy_version 339042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:17,805][626795] Updated weights for policy 0, policy_version 339052 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:18,976][24592] Fps is (10 sec: 42603.3, 60 sec: 41915.7, 300 sec: 41904.1). Total num frames: 2777563136. Throughput: 0: 10583.5. Samples: 444378282. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:18,977][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:19,675][626795] Updated weights for policy 0, policy_version 339062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:21,559][626795] Updated weights for policy 0, policy_version 339072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:23,531][626795] Updated weights for policy 0, policy_version 339082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:23,975][24592] Fps is (10 sec: 42600.2, 60 sec: 41915.7, 300 sec: 41904.2). Total num frames: 2777776128. Throughput: 0: 10571.7. Samples: 444441480. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:23,977][24592] Avg episode reward: [(0, '5.006')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:25,631][626795] Updated weights for policy 0, policy_version 339092 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:27,521][626795] Updated weights for policy 0, policy_version 339102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:28,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42325.3, 300 sec: 41904.2). Total num frames: 2777989120. Throughput: 0: 10577.7. Samples: 444473148. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:28,977][24592] Avg episode reward: [(0, '4.798')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:29,419][626795] Updated weights for policy 0, policy_version 339112 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:31,927][626795] Updated weights for policy 0, policy_version 339122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:33,780][626795] Updated weights for policy 0, policy_version 339132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:33,976][24592] Fps is (10 sec: 40140.1, 60 sec: 42052.2, 300 sec: 41931.9). Total num frames: 2778177536. Throughput: 0: 10424.5. Samples: 444531072. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:33,978][24592] Avg episode reward: [(0, '4.723')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:35,747][626795] Updated weights for policy 0, policy_version 339142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:37,579][626795] Updated weights for policy 0, policy_version 339152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:38,975][24592] Fps is (10 sec: 40140.8, 60 sec: 42052.3, 300 sec: 41904.2). Total num frames: 2778390528. Throughput: 0: 10439.6. Samples: 444595338. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:38,981][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:39,577][626795] Updated weights for policy 0, policy_version 339162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:41,398][626795] Updated weights for policy 0, policy_version 339172 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:43,346][626795] Updated weights for policy 0, policy_version 339182 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:43,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42053.4, 300 sec: 41876.4). Total num frames: 2778603520. Throughput: 0: 10438.5. Samples: 444627360. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:43,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:45,257][626795] Updated weights for policy 0, policy_version 339192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:47,197][626795] Updated weights for policy 0, policy_version 339202 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:48,976][24592] Fps is (10 sec: 41777.4, 60 sec: 41778.9, 300 sec: 41848.6). Total num frames: 2778808320. Throughput: 0: 10531.1. Samples: 444691194. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:48,979][24592] Avg episode reward: [(0, '4.326')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:49,080][626795] Updated weights for policy 0, policy_version 339212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:51,105][626795] Updated weights for policy 0, policy_version 339222 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:52,993][626795] Updated weights for policy 0, policy_version 339232 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:53,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.8, 300 sec: 41848.6). Total num frames: 2779021312. Throughput: 0: 10501.6. Samples: 444754428. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:53,978][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:54,858][626795] Updated weights for policy 0, policy_version 339242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:57,015][626795] Updated weights for policy 0, policy_version 339252 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:07:58,884][626795] Updated weights for policy 0, policy_version 339262 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:58,975][24592] Fps is (10 sec: 42600.4, 60 sec: 42189.0, 300 sec: 41876.6). Total num frames: 2779234304. Throughput: 0: 10472.6. Samples: 444785250. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:07:58,976][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:00,739][626795] Updated weights for policy 0, policy_version 339272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:02,702][626795] Updated weights for policy 0, policy_version 339282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:03,975][24592] Fps is (10 sec: 40960.0, 60 sec: 41779.2, 300 sec: 41820.9). Total num frames: 2779430912. Throughput: 0: 10493.6. Samples: 444850494. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:03,976][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:05,089][626795] Updated weights for policy 0, policy_version 339292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:06,897][626795] Updated weights for policy 0, policy_version 339302 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:08,914][626795] Updated weights for policy 0, policy_version 339312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:08,976][24592] Fps is (10 sec: 40958.1, 60 sec: 41779.8, 300 sec: 41931.9). Total num frames: 2779643904. Throughput: 0: 10410.4. Samples: 444909954. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:08,978][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:10,716][626795] Updated weights for policy 0, policy_version 339322 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:12,633][626795] Updated weights for policy 0, policy_version 339332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:13,975][24592] Fps is (10 sec: 43417.7, 60 sec: 41916.0, 300 sec: 41904.3). Total num frames: 2779865088. Throughput: 0: 10433.7. Samples: 444942666. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:13,977][24592] Avg episode reward: [(0, '4.830')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:14,514][626795] Updated weights for policy 0, policy_version 339342 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:16,317][626795] Updated weights for policy 0, policy_version 339352 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:18,146][626795] Updated weights for policy 0, policy_version 339362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:18,976][24592] Fps is (10 sec: 44236.2, 60 sec: 42052.0, 300 sec: 41931.9). Total num frames: 2780086272. Throughput: 0: 10603.1. Samples: 445008216. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:18,977][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:20,137][626795] Updated weights for policy 0, policy_version 339372 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:21,899][626795] Updated weights for policy 0, policy_version 339382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:23,879][626795] Updated weights for policy 0, policy_version 339392 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:23,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42052.3, 300 sec: 41932.0). Total num frames: 2780299264. Throughput: 0: 10607.1. Samples: 445072656. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:23,977][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:25,803][626795] Updated weights for policy 0, policy_version 339402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:27,786][626795] Updated weights for policy 0, policy_version 339412 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:28,975][24592] Fps is (10 sec: 42600.8, 60 sec: 42052.3, 300 sec: 41931.9). Total num frames: 2780512256. Throughput: 0: 10603.5. Samples: 445104516. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:28,977][24592] Avg episode reward: [(0, '4.872')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:29,767][626795] Updated weights for policy 0, policy_version 339422 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:31,697][626795] Updated weights for policy 0, policy_version 339432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:33,477][626795] Updated weights for policy 0, policy_version 339442 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:33,975][24592] Fps is (10 sec: 41779.0, 60 sec: 42325.4, 300 sec: 41904.2). Total num frames: 2780717056. Throughput: 0: 10598.0. Samples: 445168098. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:33,987][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:36,137][626795] Updated weights for policy 0, policy_version 339452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:38,057][626795] Updated weights for policy 0, policy_version 339462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:38,976][24592] Fps is (10 sec: 39320.3, 60 sec: 41915.5, 300 sec: 41931.9). Total num frames: 2780905472. Throughput: 0: 10453.7. Samples: 445224846. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:38,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:40,125][626795] Updated weights for policy 0, policy_version 339472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:41,855][626795] Updated weights for policy 0, policy_version 339482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:43,670][626795] Updated weights for policy 0, policy_version 339492 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:43,975][24592] Fps is (10 sec: 40959.9, 60 sec: 42052.2, 300 sec: 41904.2). Total num frames: 2781126656. Throughput: 0: 10486.4. Samples: 445257138. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:43,976][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:45,709][626795] Updated weights for policy 0, policy_version 339502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:47,648][626795] Updated weights for policy 0, policy_version 339512 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:48,976][24592] Fps is (10 sec: 43416.9, 60 sec: 42188.8, 300 sec: 41876.3). Total num frames: 2781339648. Throughput: 0: 10465.6. Samples: 445321452. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:48,978][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:49,584][626795] Updated weights for policy 0, policy_version 339522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:51,448][626795] Updated weights for policy 0, policy_version 339532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:53,299][626795] Updated weights for policy 0, policy_version 339542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:53,976][24592] Fps is (10 sec: 42597.8, 60 sec: 42188.7, 300 sec: 41904.2). Total num frames: 2781552640. Throughput: 0: 10580.9. Samples: 445386090. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:53,976][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:55,282][626795] Updated weights for policy 0, policy_version 339552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:57,276][626795] Updated weights for policy 0, policy_version 339562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:58,976][24592] Fps is (10 sec: 42599.6, 60 sec: 42188.6, 300 sec: 41904.2). Total num frames: 2781765632. Throughput: 0: 10548.2. Samples: 445417338. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:08:58,976][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:08:59,010][626795] Updated weights for policy 0, policy_version 339572 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:00,971][626795] Updated weights for policy 0, policy_version 339582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:02,930][626795] Updated weights for policy 0, policy_version 339592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:03,976][24592] Fps is (10 sec: 42598.9, 60 sec: 42461.8, 300 sec: 41904.1). Total num frames: 2781978624. Throughput: 0: 10539.6. Samples: 445482492. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:03,978][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000339597_2781978624.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:04,119][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000338367_2771902464.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:04,948][626795] Updated weights for policy 0, policy_version 339602 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:06,823][626795] Updated weights for policy 0, policy_version 339612 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:08,975][24592] Fps is (10 sec: 40141.7, 60 sec: 42052.6, 300 sec: 41820.9). Total num frames: 2782167040. Throughput: 0: 10445.7. Samples: 445542714. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:08,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:09,277][626795] Updated weights for policy 0, policy_version 339622 (0.0037)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:11,218][626795] Updated weights for policy 0, policy_version 339632 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:13,210][626795] Updated weights for policy 0, policy_version 339642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:13,976][24592] Fps is (10 sec: 40140.2, 60 sec: 41915.6, 300 sec: 41904.1). Total num frames: 2782380032. Throughput: 0: 10362.1. Samples: 445570812. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:13,977][24592] Avg episode reward: [(0, '4.983')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:14,886][626795] Updated weights for policy 0, policy_version 339652 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:16,897][626795] Updated weights for policy 0, policy_version 339662 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:18,925][626795] Updated weights for policy 0, policy_version 339672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:18,975][24592] Fps is (10 sec: 42597.9, 60 sec: 41779.5, 300 sec: 41904.2). Total num frames: 2782593024. Throughput: 0: 10396.4. Samples: 445635936. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:18,978][24592] Avg episode reward: [(0, '4.711')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:20,823][626795] Updated weights for policy 0, policy_version 339682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:22,658][626795] Updated weights for policy 0, policy_version 339692 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:23,978][24592] Fps is (10 sec: 43408.8, 60 sec: 41914.1, 300 sec: 41933.4). Total num frames: 2782814208. Throughput: 0: 10575.8. Samples: 445700778. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:23,980][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:24,645][626795] Updated weights for policy 0, policy_version 339702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:26,501][626795] Updated weights for policy 0, policy_version 339712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:28,373][626795] Updated weights for policy 0, policy_version 339722 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:28,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41779.2, 300 sec: 41904.2). Total num frames: 2783019008. Throughput: 0: 10559.7. Samples: 445732326. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:28,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:30,340][626795] Updated weights for policy 0, policy_version 339732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:31,154][626772] Signal inference workers to stop experience collection... (5750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:31,160][626772] Signal inference workers to resume experience collection... (5750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:31,174][626795] InferenceWorker_p0-w0: stopping experience collection (5750 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:31,175][626795] InferenceWorker_p0-w0: resuming experience collection (5750 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:32,234][626795] Updated weights for policy 0, policy_version 339742 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:33,976][24592] Fps is (10 sec: 42607.8, 60 sec: 42052.2, 300 sec: 41931.9). Total num frames: 2783240192. Throughput: 0: 10554.9. Samples: 445796418. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:33,978][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:34,149][626795] Updated weights for policy 0, policy_version 339752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:36,157][626795] Updated weights for policy 0, policy_version 339762 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:37,920][626795] Updated weights for policy 0, policy_version 339772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:38,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42325.5, 300 sec: 41904.2). Total num frames: 2783444992. Throughput: 0: 10543.4. Samples: 445860540. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:38,977][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:39,921][626795] Updated weights for policy 0, policy_version 339782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:42,390][626795] Updated weights for policy 0, policy_version 339792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:43,975][24592] Fps is (10 sec: 40140.9, 60 sec: 41915.7, 300 sec: 41932.0). Total num frames: 2783641600. Throughput: 0: 10447.9. Samples: 445887492. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:43,976][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:44,375][626795] Updated weights for policy 0, policy_version 339802 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:46,219][626795] Updated weights for policy 0, policy_version 339812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:48,031][626795] Updated weights for policy 0, policy_version 339822 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:48,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41916.1, 300 sec: 41932.0). Total num frames: 2783854592. Throughput: 0: 10427.4. Samples: 445951722. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:48,980][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:49,984][626795] Updated weights for policy 0, policy_version 339832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:51,888][626795] Updated weights for policy 0, policy_version 339842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:53,803][626795] Updated weights for policy 0, policy_version 339852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:53,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42052.4, 300 sec: 41959.7). Total num frames: 2784075776. Throughput: 0: 10523.1. Samples: 446016252. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:53,976][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:55,630][626795] Updated weights for policy 0, policy_version 339862 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:57,596][626795] Updated weights for policy 0, policy_version 339872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:58,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42052.4, 300 sec: 41987.5). Total num frames: 2784288768. Throughput: 0: 10628.7. Samples: 446049102. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:09:58,976][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:09:59,500][626795] Updated weights for policy 0, policy_version 339882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:01,356][626795] Updated weights for policy 0, policy_version 339892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:03,368][626795] Updated weights for policy 0, policy_version 339902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:03,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42052.3, 300 sec: 42015.3). Total num frames: 2784501760. Throughput: 0: 10587.0. Samples: 446112348. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:03,977][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:05,262][626795] Updated weights for policy 0, policy_version 339912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:07,162][626795] Updated weights for policy 0, policy_version 339922 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:08,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42461.9, 300 sec: 42015.2). Total num frames: 2784714752. Throughput: 0: 10579.9. Samples: 446176848. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:08,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:09,145][626795] Updated weights for policy 0, policy_version 339932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:11,126][626795] Updated weights for policy 0, policy_version 339942 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:12,856][626795] Updated weights for policy 0, policy_version 339952 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:14,529][24592] Fps is (10 sec: 40364.6, 60 sec: 42074.0, 300 sec: 41992.0). Total num frames: 2784927744. Throughput: 0: 10450.9. Samples: 446208402. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:14,530][24592] Avg episode reward: [(0, '5.052')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:15,482][626795] Updated weights for policy 0, policy_version 339962 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:17,444][626795] Updated weights for policy 0, policy_version 339972 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:18,976][24592] Fps is (10 sec: 40139.5, 60 sec: 42052.1, 300 sec: 41959.7). Total num frames: 2785116160. Throughput: 0: 10437.3. Samples: 446266098. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:18,976][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:19,326][626795] Updated weights for policy 0, policy_version 339982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:21,236][626795] Updated weights for policy 0, policy_version 339992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:22,939][626795] Updated weights for policy 0, policy_version 340002 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:23,975][24592] Fps is (10 sec: 42492.3, 60 sec: 41917.3, 300 sec: 41933.8). Total num frames: 2785329152. Throughput: 0: 10441.9. Samples: 446330424. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:23,976][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:25,058][626795] Updated weights for policy 0, policy_version 340012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:26,938][626795] Updated weights for policy 0, policy_version 340022 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:28,807][626795] Updated weights for policy 0, policy_version 340032 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:28,975][24592] Fps is (10 sec: 43419.1, 60 sec: 42188.8, 300 sec: 41931.9). Total num frames: 2785550336. Throughput: 0: 10550.0. Samples: 446362242. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:28,976][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:30,762][626795] Updated weights for policy 0, policy_version 340042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:32,628][626795] Updated weights for policy 0, policy_version 340052 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:33,975][24592] Fps is (10 sec: 43417.1, 60 sec: 42052.2, 300 sec: 41904.1). Total num frames: 2785763328. Throughput: 0: 10561.7. Samples: 446427000. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:33,977][24592] Avg episode reward: [(0, '4.393')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:34,633][626795] Updated weights for policy 0, policy_version 340062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:36,433][626795] Updated weights for policy 0, policy_version 340072 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:38,317][626795] Updated weights for policy 0, policy_version 340082 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:38,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42188.8, 300 sec: 41987.5). Total num frames: 2785976320. Throughput: 0: 10559.2. Samples: 446491416. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:38,977][24592] Avg episode reward: [(0, '4.505')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:40,397][626795] Updated weights for policy 0, policy_version 340092 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:42,281][626795] Updated weights for policy 0, policy_version 340102 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:43,975][24592] Fps is (10 sec: 42598.9, 60 sec: 42461.9, 300 sec: 42015.2). Total num frames: 2786189312. Throughput: 0: 10530.8. Samples: 446522988. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:43,977][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:44,149][626795] Updated weights for policy 0, policy_version 340112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:46,075][626795] Updated weights for policy 0, policy_version 340122 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:48,666][626795] Updated weights for policy 0, policy_version 340132 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:48,976][24592] Fps is (10 sec: 39321.2, 60 sec: 41915.6, 300 sec: 41904.1). Total num frames: 2786369536. Throughput: 0: 10402.2. Samples: 446580450. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:48,977][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:50,678][626795] Updated weights for policy 0, policy_version 340142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:52,412][626795] Updated weights for policy 0, policy_version 340152 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:53,975][24592] Fps is (10 sec: 39321.6, 60 sec: 41779.2, 300 sec: 41931.9). Total num frames: 2786582528. Throughput: 0: 10408.5. Samples: 446645232. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:53,976][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:54,370][626795] Updated weights for policy 0, policy_version 340162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:56,372][626795] Updated weights for policy 0, policy_version 340172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:10:58,273][626795] Updated weights for policy 0, policy_version 340182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:58,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41779.2, 300 sec: 41932.0). Total num frames: 2786795520. Throughput: 0: 10525.6. Samples: 446676228. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:10:58,977][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:00,175][626795] Updated weights for policy 0, policy_version 340192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:02,010][626795] Updated weights for policy 0, policy_version 340202 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:03,967][626795] Updated weights for policy 0, policy_version 340212 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:03,976][24592] Fps is (10 sec: 43414.7, 60 sec: 41915.3, 300 sec: 41987.4). Total num frames: 2787016704. Throughput: 0: 10552.9. Samples: 446740980. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:03,978][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000340212_2787016704.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:04,043][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000338980_2776924160.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:06,080][626795] Updated weights for policy 0, policy_version 340222 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:07,926][626795] Updated weights for policy 0, policy_version 340232 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:08,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41779.2, 300 sec: 41959.7). Total num frames: 2787221504. Throughput: 0: 10511.2. Samples: 446803428. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:08,978][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:09,824][626795] Updated weights for policy 0, policy_version 340242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:11,778][626795] Updated weights for policy 0, policy_version 340252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:13,693][626795] Updated weights for policy 0, policy_version 340262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:13,976][24592] Fps is (10 sec: 41780.5, 60 sec: 42167.9, 300 sec: 41987.4). Total num frames: 2787434496. Throughput: 0: 10521.4. Samples: 446835708. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:13,977][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:15,690][626795] Updated weights for policy 0, policy_version 340272 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:17,527][626795] Updated weights for policy 0, policy_version 340282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:18,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42189.0, 300 sec: 41987.5). Total num frames: 2787647488. Throughput: 0: 10491.1. Samples: 446899098. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:18,976][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:19,570][626795] Updated weights for policy 0, policy_version 340292 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:22,206][626795] Updated weights for policy 0, policy_version 340302 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:23,975][24592] Fps is (10 sec: 39323.0, 60 sec: 41642.7, 300 sec: 41959.7). Total num frames: 2787827712. Throughput: 0: 10283.9. Samples: 446954190. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:23,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:24,234][626795] Updated weights for policy 0, policy_version 340312 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:26,038][626795] Updated weights for policy 0, policy_version 340322 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:27,975][626795] Updated weights for policy 0, policy_version 340332 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:28,975][24592] Fps is (10 sec: 38502.3, 60 sec: 41369.6, 300 sec: 41959.7). Total num frames: 2788032512. Throughput: 0: 10294.0. Samples: 446986218. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:28,976][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:29,969][626795] Updated weights for policy 0, policy_version 340342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:31,921][626795] Updated weights for policy 0, policy_version 340352 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:33,812][626795] Updated weights for policy 0, policy_version 340362 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:33,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.7, 300 sec: 41959.7). Total num frames: 2788245504. Throughput: 0: 10417.6. Samples: 447049242. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:33,980][24592] Avg episode reward: [(0, '4.891')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:35,681][626795] Updated weights for policy 0, policy_version 340372 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:37,590][626795] Updated weights for policy 0, policy_version 340382 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:38,975][24592] Fps is (10 sec: 43417.9, 60 sec: 41506.2, 300 sec: 41987.7). Total num frames: 2788466688. Throughput: 0: 10414.3. Samples: 447113874. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:38,976][24592] Avg episode reward: [(0, '4.684')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:39,646][626795] Updated weights for policy 0, policy_version 340392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:41,459][626795] Updated weights for policy 0, policy_version 340402 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:43,352][626795] Updated weights for policy 0, policy_version 340412 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:43,975][24592] Fps is (10 sec: 43417.6, 60 sec: 41506.2, 300 sec: 41959.7). Total num frames: 2788679680. Throughput: 0: 10437.4. Samples: 447145908. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:43,978][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:45,347][626795] Updated weights for policy 0, policy_version 340422 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:47,307][626795] Updated weights for policy 0, policy_version 340432 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:48,976][24592] Fps is (10 sec: 41778.2, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2788884480. Throughput: 0: 10409.7. Samples: 447209412. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:48,978][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:49,255][626795] Updated weights for policy 0, policy_version 340442 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:51,134][626795] Updated weights for policy 0, policy_version 340452 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:53,623][626795] Updated weights for policy 0, policy_version 340462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:53,975][24592] Fps is (10 sec: 40140.5, 60 sec: 41642.6, 300 sec: 41959.7). Total num frames: 2789081088. Throughput: 0: 10317.1. Samples: 447267696. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:53,989][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:55,505][626795] Updated weights for policy 0, policy_version 340472 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:57,467][626795] Updated weights for policy 0, policy_version 340482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:58,977][24592] Fps is (10 sec: 40135.4, 60 sec: 41505.1, 300 sec: 41903.9). Total num frames: 2789285888. Throughput: 0: 10283.9. Samples: 447298494. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:11:58,979][24592] Avg episode reward: [(0, '4.871')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:11:59,451][626795] Updated weights for policy 0, policy_version 340492 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:01,296][626795] Updated weights for policy 0, policy_version 340502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:03,130][626795] Updated weights for policy 0, policy_version 340512 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:03,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41506.6, 300 sec: 41932.1). Total num frames: 2789507072. Throughput: 0: 10323.6. Samples: 447363660. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:03,978][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:05,098][626795] Updated weights for policy 0, policy_version 340522 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:06,980][626795] Updated weights for policy 0, policy_version 340532 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:08,841][626795] Updated weights for policy 0, policy_version 340542 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:08,975][24592] Fps is (10 sec: 43424.1, 60 sec: 41642.6, 300 sec: 41932.0). Total num frames: 2789720064. Throughput: 0: 10540.3. Samples: 447428502. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:08,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:10,806][626795] Updated weights for policy 0, policy_version 340552 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:12,721][626795] Updated weights for policy 0, policy_version 340562 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:13,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41642.9, 300 sec: 41932.0). Total num frames: 2789933056. Throughput: 0: 10533.5. Samples: 447460224. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:13,977][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:14,667][626795] Updated weights for policy 0, policy_version 340572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:16,615][626795] Updated weights for policy 0, policy_version 340582 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:18,416][626795] Updated weights for policy 0, policy_version 340592 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:18,975][24592] Fps is (10 sec: 42598.8, 60 sec: 41642.7, 300 sec: 41931.9). Total num frames: 2790146048. Throughput: 0: 10550.9. Samples: 447524034. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:18,976][24592] Avg episode reward: [(0, '4.380')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:20,467][626795] Updated weights for policy 0, policy_version 340602 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:22,306][626795] Updated weights for policy 0, policy_version 340612 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:23,975][24592] Fps is (10 sec: 43417.7, 60 sec: 42325.4, 300 sec: 41959.7). Total num frames: 2790367232. Throughput: 0: 10546.0. Samples: 447588444. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:23,978][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:24,300][626795] Updated weights for policy 0, policy_version 340622 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:26,733][626795] Updated weights for policy 0, policy_version 340632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:28,532][626795] Updated weights for policy 0, policy_version 340642 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:28,975][24592] Fps is (10 sec: 40140.7, 60 sec: 41915.8, 300 sec: 41932.0). Total num frames: 2790547456. Throughput: 0: 10407.7. Samples: 447614256. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:28,976][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:30,547][626795] Updated weights for policy 0, policy_version 340652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:32,445][626795] Updated weights for policy 0, policy_version 340662 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:33,975][24592] Fps is (10 sec: 40140.5, 60 sec: 42052.2, 300 sec: 41959.7). Total num frames: 2790768640. Throughput: 0: 10432.4. Samples: 447678870. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:33,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:34,444][626795] Updated weights for policy 0, policy_version 340672 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:36,229][626795] Updated weights for policy 0, policy_version 340682 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:38,224][626795] Updated weights for policy 0, policy_version 340692 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:38,975][24592] Fps is (10 sec: 43417.4, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2790981632. Throughput: 0: 10561.5. Samples: 447742962. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:38,977][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:40,078][626795] Updated weights for policy 0, policy_version 340702 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:41,926][626795] Updated weights for policy 0, policy_version 340712 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:43,872][626795] Updated weights for policy 0, policy_version 340722 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:43,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41915.7, 300 sec: 41987.5). Total num frames: 2791194624. Throughput: 0: 10598.8. Samples: 447775422. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:43,978][24592] Avg episode reward: [(0, '4.360')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:45,788][626795] Updated weights for policy 0, policy_version 340732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:47,736][626795] Updated weights for policy 0, policy_version 340742 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:48,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42052.4, 300 sec: 41987.5). Total num frames: 2791407616. Throughput: 0: 10586.1. Samples: 447840036. Policy #0 lag: (min: 1.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:48,979][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:49,709][626795] Updated weights for policy 0, policy_version 340752 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:51,588][626795] Updated weights for policy 0, policy_version 340762 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:53,423][626795] Updated weights for policy 0, policy_version 340772 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:53,977][24592] Fps is (10 sec: 42590.6, 60 sec: 42324.1, 300 sec: 41987.2). Total num frames: 2791620608. Throughput: 0: 10568.8. Samples: 447904116. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:53,979][24592] Avg episode reward: [(0, '4.516')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:55,397][626795] Updated weights for policy 0, policy_version 340782 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:57,330][626795] Updated weights for policy 0, policy_version 340792 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:58,976][24592] Fps is (10 sec: 40140.0, 60 sec: 42053.2, 300 sec: 41959.7). Total num frames: 2791809024. Throughput: 0: 10554.8. Samples: 447935190. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:12:58,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:12:59,813][626795] Updated weights for policy 0, policy_version 340802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:01,616][626795] Updated weights for policy 0, policy_version 340812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:03,472][626795] Updated weights for policy 0, policy_version 340822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:03,975][24592] Fps is (10 sec: 40148.1, 60 sec: 41915.7, 300 sec: 41959.8). Total num frames: 2792022016. Throughput: 0: 10446.8. Samples: 447994140. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:03,977][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:03,999][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000340824_2792030208.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:04,124][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000339597_2781978624.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:05,532][626795] Updated weights for policy 0, policy_version 340832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:07,499][626795] Updated weights for policy 0, policy_version 340842 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:08,975][24592] Fps is (10 sec: 43417.9, 60 sec: 42052.2, 300 sec: 41959.7). Total num frames: 2792243200. Throughput: 0: 10440.9. Samples: 448058286. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:08,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:09,437][626795] Updated weights for policy 0, policy_version 340852 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:11,260][626795] Updated weights for policy 0, policy_version 340862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:13,196][626795] Updated weights for policy 0, policy_version 340872 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:13,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42052.3, 300 sec: 41932.0). Total num frames: 2792456192. Throughput: 0: 10570.4. Samples: 448089924. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:13,976][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:15,151][626795] Updated weights for policy 0, policy_version 340882 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:16,948][626795] Updated weights for policy 0, policy_version 340892 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:18,931][626795] Updated weights for policy 0, policy_version 340902 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:18,975][24592] Fps is (10 sec: 42599.0, 60 sec: 42052.3, 300 sec: 41931.9). Total num frames: 2792669184. Throughput: 0: 10552.3. Samples: 448153722. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:18,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:20,789][626795] Updated weights for policy 0, policy_version 340912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:22,750][626795] Updated weights for policy 0, policy_version 340922 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:23,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41915.7, 300 sec: 41931.9). Total num frames: 2792882176. Throughput: 0: 10572.9. Samples: 448218744. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:23,978][24592] Avg episode reward: [(0, '4.867')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:24,708][626795] Updated weights for policy 0, policy_version 340932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:26,602][626795] Updated weights for policy 0, policy_version 340942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:28,433][626795] Updated weights for policy 0, policy_version 340952 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:28,975][24592] Fps is (10 sec: 42598.1, 60 sec: 42461.8, 300 sec: 41959.7). Total num frames: 2793095168. Throughput: 0: 10564.4. Samples: 448250820. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:28,980][24592] Avg episode reward: [(0, '4.539')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:30,425][626795] Updated weights for policy 0, policy_version 340962 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:32,845][626795] Updated weights for policy 0, policy_version 340972 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:33,975][24592] Fps is (10 sec: 40141.0, 60 sec: 41915.8, 300 sec: 41959.8). Total num frames: 2793283584. Throughput: 0: 10422.7. Samples: 448309056. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:33,976][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:34,863][626795] Updated weights for policy 0, policy_version 340982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:36,803][626795] Updated weights for policy 0, policy_version 340992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:38,590][626795] Updated weights for policy 0, policy_version 341002 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:38,975][24592] Fps is (10 sec: 40140.9, 60 sec: 41915.7, 300 sec: 41931.9). Total num frames: 2793496576. Throughput: 0: 10420.8. Samples: 448373034. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:38,977][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:40,506][626795] Updated weights for policy 0, policy_version 341012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:42,479][626795] Updated weights for policy 0, policy_version 341022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:43,976][24592] Fps is (10 sec: 43415.6, 60 sec: 42052.0, 300 sec: 41959.7). Total num frames: 2793717760. Throughput: 0: 10432.6. Samples: 448404660. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:43,977][24592] Avg episode reward: [(0, '4.927')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:44,386][626795] Updated weights for policy 0, policy_version 341032 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:46,225][626795] Updated weights for policy 0, policy_version 341042 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:48,145][626795] Updated weights for policy 0, policy_version 341052 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:48,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42052.2, 300 sec: 41959.7). Total num frames: 2793930752. Throughput: 0: 10567.7. Samples: 448469688. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:48,977][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:50,093][626795] Updated weights for policy 0, policy_version 341062 (0.0029)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:51,940][626795] Updated weights for policy 0, policy_version 341072 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:53,889][626795] Updated weights for policy 0, policy_version 341082 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:53,976][24592] Fps is (10 sec: 42595.5, 60 sec: 42052.8, 300 sec: 41959.6). Total num frames: 2794143744. Throughput: 0: 10583.1. Samples: 448534536. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:53,978][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:55,704][626795] Updated weights for policy 0, policy_version 341092 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:57,690][626795] Updated weights for policy 0, policy_version 341102 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:58,975][24592] Fps is (10 sec: 42598.7, 60 sec: 42462.0, 300 sec: 41959.7). Total num frames: 2794356736. Throughput: 0: 10596.3. Samples: 448566756. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:13:58,976][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:13:59,558][626795] Updated weights for policy 0, policy_version 341112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:01,545][626795] Updated weights for policy 0, policy_version 341122 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:03,263][626795] Updated weights for policy 0, policy_version 341132 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:04,179][24592] Fps is (10 sec: 40949.0, 60 sec: 42182.0, 300 sec: 41986.2). Total num frames: 2794561536. Throughput: 0: 10554.9. Samples: 448630842. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:04,180][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:05,890][626795] Updated weights for policy 0, policy_version 341142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:07,760][626795] Updated weights for policy 0, policy_version 341152 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:08,977][24592] Fps is (10 sec: 40959.7, 60 sec: 42052.3, 300 sec: 41987.5). Total num frames: 2794766336. Throughput: 0: 10454.9. Samples: 448689216. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:08,978][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:09,706][626795] Updated weights for policy 0, policy_version 341162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:11,628][626795] Updated weights for policy 0, policy_version 341172 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:13,524][626795] Updated weights for policy 0, policy_version 341182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:13,975][24592] Fps is (10 sec: 42648.5, 60 sec: 42052.3, 300 sec: 41987.5). Total num frames: 2794979328. Throughput: 0: 10452.4. Samples: 448721178. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:13,976][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:15,309][626795] Updated weights for policy 0, policy_version 341192 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:17,191][626795] Updated weights for policy 0, policy_version 341202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:18,975][24592] Fps is (10 sec: 42598.6, 60 sec: 42052.3, 300 sec: 41960.0). Total num frames: 2795192320. Throughput: 0: 10617.1. Samples: 448786824. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:18,976][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:19,236][626795] Updated weights for policy 0, policy_version 341212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:21,133][626795] Updated weights for policy 0, policy_version 341222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:22,972][626795] Updated weights for policy 0, policy_version 341232 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:23,976][24592] Fps is (10 sec: 42596.3, 60 sec: 42051.9, 300 sec: 41987.4). Total num frames: 2795405312. Throughput: 0: 10590.6. Samples: 448849614. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:23,977][24592] Avg episode reward: [(0, '5.066')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:24,966][626795] Updated weights for policy 0, policy_version 341242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:26,856][626795] Updated weights for policy 0, policy_version 341252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:28,820][626795] Updated weights for policy 0, policy_version 341262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:28,975][24592] Fps is (10 sec: 43417.2, 60 sec: 42188.8, 300 sec: 41987.5). Total num frames: 2795626496. Throughput: 0: 10620.9. Samples: 448882596. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:28,977][24592] Avg episode reward: [(0, '4.555')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:30,627][626795] Updated weights for policy 0, policy_version 341272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:32,501][626795] Updated weights for policy 0, policy_version 341282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:33,976][24592] Fps is (10 sec: 43418.2, 60 sec: 42598.1, 300 sec: 42015.2). Total num frames: 2795839488. Throughput: 0: 10613.8. Samples: 448947312. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:33,979][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:34,508][626795] Updated weights for policy 0, policy_version 341292 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:37,033][626795] Updated weights for policy 0, policy_version 341302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:38,931][626795] Updated weights for policy 0, policy_version 341312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:38,976][24592] Fps is (10 sec: 40139.5, 60 sec: 42188.5, 300 sec: 41987.4). Total num frames: 2796027904. Throughput: 0: 10458.8. Samples: 449005176. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:38,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:40,783][626795] Updated weights for policy 0, policy_version 341322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:42,659][626795] Updated weights for policy 0, policy_version 341332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:43,975][24592] Fps is (10 sec: 40961.6, 60 sec: 42189.1, 300 sec: 42015.2). Total num frames: 2796249088. Throughput: 0: 10461.6. Samples: 449037528. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:43,978][24592] Avg episode reward: [(0, '4.541')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:44,605][626795] Updated weights for policy 0, policy_version 341342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:46,448][626795] Updated weights for policy 0, policy_version 341352 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:48,392][626795] Updated weights for policy 0, policy_version 341362 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:48,975][24592] Fps is (10 sec: 42600.1, 60 sec: 42052.3, 300 sec: 41959.7). Total num frames: 2796453888. Throughput: 0: 10509.3. Samples: 449101620. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:48,978][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:50,431][626795] Updated weights for policy 0, policy_version 341372 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:52,155][626795] Updated weights for policy 0, policy_version 341382 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:53,975][24592] Fps is (10 sec: 42598.0, 60 sec: 42189.5, 300 sec: 41987.5). Total num frames: 2796675072. Throughput: 0: 10602.0. Samples: 449166306. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:53,977][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:54,185][626795] Updated weights for policy 0, policy_version 341392 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:56,133][626795] Updated weights for policy 0, policy_version 341402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:57,990][626795] Updated weights for policy 0, policy_version 341412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:58,975][24592] Fps is (10 sec: 43417.0, 60 sec: 42188.7, 300 sec: 41987.5). Total num frames: 2796888064. Throughput: 0: 10596.0. Samples: 449197998. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:14:58,977][24592] Avg episode reward: [(0, '4.919')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:14:59,861][626795] Updated weights for policy 0, policy_version 341422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:01,844][626795] Updated weights for policy 0, policy_version 341432 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:03,705][626795] Updated weights for policy 0, policy_version 341442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:03,976][24592] Fps is (10 sec: 42596.8, 60 sec: 42469.3, 300 sec: 41987.4). Total num frames: 2797101056. Throughput: 0: 10563.5. Samples: 449262186. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:03,978][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000341443_2797101056.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:04,113][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000340212_2787016704.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:05,786][626795] Updated weights for policy 0, policy_version 341452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:07,524][626795] Updated weights for policy 0, policy_version 341462 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:08,975][24592] Fps is (10 sec: 42599.2, 60 sec: 42461.9, 300 sec: 42066.4). Total num frames: 2797314048. Throughput: 0: 10600.5. Samples: 449326632. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:08,976][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:10,056][626795] Updated weights for policy 0, policy_version 341472 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:11,918][626795] Updated weights for policy 0, policy_version 341482 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:13,806][626795] Updated weights for policy 0, policy_version 341492 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:13,975][24592] Fps is (10 sec: 40961.7, 60 sec: 42188.8, 300 sec: 42015.3). Total num frames: 2797510656. Throughput: 0: 10437.2. Samples: 449352270. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:13,977][24592] Avg episode reward: [(0, '4.462')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:15,745][626795] Updated weights for policy 0, policy_version 341502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:17,632][626795] Updated weights for policy 0, policy_version 341512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:18,975][24592] Fps is (10 sec: 40959.4, 60 sec: 42188.7, 300 sec: 42015.2). Total num frames: 2797723648. Throughput: 0: 10456.3. Samples: 449417844. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:18,976][24592] Avg episode reward: [(0, '4.799')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:19,586][626795] Updated weights for policy 0, policy_version 341522 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:21,619][626795] Updated weights for policy 0, policy_version 341532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:23,272][626795] Updated weights for policy 0, policy_version 341542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:23,975][24592] Fps is (10 sec: 41778.8, 60 sec: 42052.5, 300 sec: 41959.7). Total num frames: 2797928448. Throughput: 0: 10590.7. Samples: 449481756. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:23,977][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:25,387][626795] Updated weights for policy 0, policy_version 341552 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:27,155][626795] Updated weights for policy 0, policy_version 341562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:28,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42052.3, 300 sec: 41987.5). Total num frames: 2798149632. Throughput: 0: 10573.7. Samples: 449513346. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:28,976][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:29,058][626795] Updated weights for policy 0, policy_version 341572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:31,061][626795] Updated weights for policy 0, policy_version 341582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:32,994][626795] Updated weights for policy 0, policy_version 341592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:33,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42052.4, 300 sec: 41987.5). Total num frames: 2798362624. Throughput: 0: 10602.2. Samples: 449578722. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:33,976][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:34,805][626795] Updated weights for policy 0, policy_version 341602 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:36,715][626795] Updated weights for policy 0, policy_version 341612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:38,651][626795] Updated weights for policy 0, policy_version 341622 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:38,976][24592] Fps is (10 sec: 43414.1, 60 sec: 42598.1, 300 sec: 42015.1). Total num frames: 2798583808. Throughput: 0: 10612.1. Samples: 449643858. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:38,977][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:40,492][626795] Updated weights for policy 0, policy_version 341632 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:42,941][626795] Updated weights for policy 0, policy_version 341642 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:43,975][24592] Fps is (10 sec: 40141.2, 60 sec: 41915.7, 300 sec: 42015.3). Total num frames: 2798764032. Throughput: 0: 10623.1. Samples: 449676036. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:43,977][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:44,959][626795] Updated weights for policy 0, policy_version 341652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:46,909][626795] Updated weights for policy 0, policy_version 341662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:48,806][626795] Updated weights for policy 0, policy_version 341672 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:48,975][24592] Fps is (10 sec: 39325.1, 60 sec: 42052.3, 300 sec: 42015.2). Total num frames: 2798977024. Throughput: 0: 10465.3. Samples: 449733120. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:48,977][24592] Avg episode reward: [(0, '4.921')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:50,772][626795] Updated weights for policy 0, policy_version 341682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:52,546][626795] Updated weights for policy 0, policy_version 341692 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:53,975][24592] Fps is (10 sec: 43417.8, 60 sec: 42052.3, 300 sec: 42043.0). Total num frames: 2799198208. Throughput: 0: 10449.5. Samples: 449796858. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:53,977][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:54,518][626795] Updated weights for policy 0, policy_version 341702 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:56,477][626795] Updated weights for policy 0, policy_version 341712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:15:58,333][626795] Updated weights for policy 0, policy_version 341722 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:58,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42052.4, 300 sec: 42015.3). Total num frames: 2799411200. Throughput: 0: 10599.5. Samples: 449829246. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:15:58,977][24592] Avg episode reward: [(0, '4.957')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:00,202][626795] Updated weights for policy 0, policy_version 341732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:02,149][626795] Updated weights for policy 0, policy_version 341742 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:03,976][24592] Fps is (10 sec: 42596.9, 60 sec: 42052.3, 300 sec: 42043.0). Total num frames: 2799624192. Throughput: 0: 10573.4. Samples: 449893650. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:03,977][24592] Avg episode reward: [(0, '4.735')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:03,989][626795] Updated weights for policy 0, policy_version 341752 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:06,021][626795] Updated weights for policy 0, policy_version 341762 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:07,900][626795] Updated weights for policy 0, policy_version 341772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:08,976][24592] Fps is (10 sec: 42597.5, 60 sec: 42052.1, 300 sec: 42043.0). Total num frames: 2799837184. Throughput: 0: 10577.3. Samples: 449957736. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:08,977][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:09,878][626795] Updated weights for policy 0, policy_version 341782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:11,822][626795] Updated weights for policy 0, policy_version 341792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:13,656][626795] Updated weights for policy 0, policy_version 341802 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:13,976][24592] Fps is (10 sec: 42597.5, 60 sec: 42325.0, 300 sec: 42042.9). Total num frames: 2800050176. Throughput: 0: 10573.8. Samples: 449989170. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:13,977][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:16,257][626795] Updated weights for policy 0, policy_version 341812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:18,196][626795] Updated weights for policy 0, policy_version 341822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:18,975][24592] Fps is (10 sec: 39322.5, 60 sec: 41779.3, 300 sec: 42043.0). Total num frames: 2800230400. Throughput: 0: 10400.7. Samples: 450046752. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:18,977][24592] Avg episode reward: [(0, '4.862')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:20,030][626795] Updated weights for policy 0, policy_version 341832 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:21,983][626795] Updated weights for policy 0, policy_version 341842 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:23,888][626795] Updated weights for policy 0, policy_version 341852 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:23,975][24592] Fps is (10 sec: 40142.9, 60 sec: 42052.3, 300 sec: 42098.6). Total num frames: 2800451584. Throughput: 0: 10383.4. Samples: 450111102. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:23,977][24592] Avg episode reward: [(0, '4.943')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:25,805][626795] Updated weights for policy 0, policy_version 341862 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:27,575][626795] Updated weights for policy 0, policy_version 341872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:28,976][24592] Fps is (10 sec: 44235.7, 60 sec: 42052.2, 300 sec: 42126.3). Total num frames: 2800672768. Throughput: 0: 10397.8. Samples: 450143940. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:28,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:29,506][626795] Updated weights for policy 0, policy_version 341882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:31,366][626795] Updated weights for policy 0, policy_version 341892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:33,354][626795] Updated weights for policy 0, policy_version 341902 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:33,976][24592] Fps is (10 sec: 43416.9, 60 sec: 42052.2, 300 sec: 42098.5). Total num frames: 2800885760. Throughput: 0: 10578.6. Samples: 450209160. Policy #0 lag: (min: 0.0, avg: 2.1, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:33,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:35,291][626795] Updated weights for policy 0, policy_version 341912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:37,202][626795] Updated weights for policy 0, policy_version 341922 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:38,975][24592] Fps is (10 sec: 42599.3, 60 sec: 41916.4, 300 sec: 42098.5). Total num frames: 2801098752. Throughput: 0: 10579.9. Samples: 450272952. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:38,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:39,108][626795] Updated weights for policy 0, policy_version 341932 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:41,022][626795] Updated weights for policy 0, policy_version 341942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:42,848][626795] Updated weights for policy 0, policy_version 341952 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:43,975][24592] Fps is (10 sec: 42599.1, 60 sec: 42461.9, 300 sec: 42126.3). Total num frames: 2801311744. Throughput: 0: 10563.9. Samples: 450304620. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:43,978][24592] Avg episode reward: [(0, '4.537')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:44,785][626795] Updated weights for policy 0, policy_version 341962 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:45,967][626772] Signal inference workers to stop experience collection... (5800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:45,967][626772] Signal inference workers to resume experience collection... (5800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:45,976][626795] InferenceWorker_p0-w0: stopping experience collection (5800 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:45,979][626795] InferenceWorker_p0-w0: resuming experience collection (5800 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:46,787][626795] Updated weights for policy 0, policy_version 341972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:48,977][24592] Fps is (10 sec: 39313.7, 60 sec: 41914.3, 300 sec: 42070.5). Total num frames: 2801491968. Throughput: 0: 10549.1. Samples: 450368376. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:48,979][24592] Avg episode reward: [(0, '4.385')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:49,492][626795] Updated weights for policy 0, policy_version 341982 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:51,354][626795] Updated weights for policy 0, policy_version 341992 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:53,269][626795] Updated weights for policy 0, policy_version 342002 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:53,976][24592] Fps is (10 sec: 39320.6, 60 sec: 41779.0, 300 sec: 42098.7). Total num frames: 2801704960. Throughput: 0: 10366.7. Samples: 450424236. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:53,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:55,266][626795] Updated weights for policy 0, policy_version 342012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:57,169][626795] Updated weights for policy 0, policy_version 342022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:58,975][24592] Fps is (10 sec: 42606.7, 60 sec: 41779.2, 300 sec: 42070.8). Total num frames: 2801917952. Throughput: 0: 10376.4. Samples: 450456102. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:16:58,977][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:16:59,043][626795] Updated weights for policy 0, policy_version 342032 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:00,888][626795] Updated weights for policy 0, policy_version 342042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:02,796][626795] Updated weights for policy 0, policy_version 342052 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:03,975][24592] Fps is (10 sec: 43418.7, 60 sec: 41916.0, 300 sec: 42098.6). Total num frames: 2802139136. Throughput: 0: 10558.5. Samples: 450521886. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:03,977][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000342058_2802139136.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:04,135][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000340824_2792030208.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:04,786][626795] Updated weights for policy 0, policy_version 342062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:06,667][626795] Updated weights for policy 0, policy_version 342072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:08,566][626795] Updated weights for policy 0, policy_version 342082 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:08,975][24592] Fps is (10 sec: 43417.9, 60 sec: 41915.9, 300 sec: 42098.5). Total num frames: 2802352128. Throughput: 0: 10544.7. Samples: 450585612. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:08,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:10,458][626795] Updated weights for policy 0, policy_version 342092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:12,391][626795] Updated weights for policy 0, policy_version 342102 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:13,975][24592] Fps is (10 sec: 42598.0, 60 sec: 41916.0, 300 sec: 42098.5). Total num frames: 2802565120. Throughput: 0: 10519.2. Samples: 450617304. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:13,976][24592] Avg episode reward: [(0, '4.521')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:14,373][626795] Updated weights for policy 0, policy_version 342112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:16,247][626795] Updated weights for policy 0, policy_version 342122 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:18,202][626795] Updated weights for policy 0, policy_version 342132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:18,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42461.9, 300 sec: 42070.8). Total num frames: 2802778112. Throughput: 0: 10491.0. Samples: 450681252. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:18,976][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:20,122][626795] Updated weights for policy 0, policy_version 342142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:22,644][626795] Updated weights for policy 0, policy_version 342152 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:23,975][24592] Fps is (10 sec: 39322.1, 60 sec: 41779.2, 300 sec: 42070.8). Total num frames: 2802958336. Throughput: 0: 10346.7. Samples: 450738552. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:23,977][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:24,671][626795] Updated weights for policy 0, policy_version 342162 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:26,535][626795] Updated weights for policy 0, policy_version 342172 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:28,433][626795] Updated weights for policy 0, policy_version 342182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:28,975][24592] Fps is (10 sec: 39321.6, 60 sec: 41642.8, 300 sec: 42043.0). Total num frames: 2803171328. Throughput: 0: 10342.4. Samples: 450770028. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:28,976][24592] Avg episode reward: [(0, '4.294')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:30,436][626795] Updated weights for policy 0, policy_version 342192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:32,372][626795] Updated weights for policy 0, policy_version 342202 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:33,975][24592] Fps is (10 sec: 43417.6, 60 sec: 41779.3, 300 sec: 42070.8). Total num frames: 2803392512. Throughput: 0: 10350.6. Samples: 450834132. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:33,976][24592] Avg episode reward: [(0, '4.901')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:34,238][626795] Updated weights for policy 0, policy_version 342212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:36,115][626795] Updated weights for policy 0, policy_version 342222 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:38,011][626795] Updated weights for policy 0, policy_version 342232 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:38,975][24592] Fps is (10 sec: 42598.5, 60 sec: 41642.7, 300 sec: 42043.0). Total num frames: 2803597312. Throughput: 0: 10541.7. Samples: 450898608. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:38,976][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:39,852][626795] Updated weights for policy 0, policy_version 342242 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:41,830][626795] Updated weights for policy 0, policy_version 342252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:43,672][626795] Updated weights for policy 0, policy_version 342262 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:43,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41779.2, 300 sec: 42070.8). Total num frames: 2803818496. Throughput: 0: 10557.5. Samples: 450931188. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:43,976][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:45,539][626795] Updated weights for policy 0, policy_version 342272 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:47,525][626795] Updated weights for policy 0, policy_version 342282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:48,976][24592] Fps is (10 sec: 43413.4, 60 sec: 42326.1, 300 sec: 42070.9). Total num frames: 2804031488. Throughput: 0: 10528.5. Samples: 450995676. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:48,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:49,430][626795] Updated weights for policy 0, policy_version 342292 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:51,404][626795] Updated weights for policy 0, policy_version 342302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:53,271][626795] Updated weights for policy 0, policy_version 342312 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:54,373][24592] Fps is (10 sec: 40182.7, 60 sec: 41911.4, 300 sec: 42069.7). Total num frames: 2804236288. Throughput: 0: 10445.5. Samples: 451059810. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:54,375][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:55,845][626795] Updated weights for policy 0, policy_version 342322 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:57,769][626795] Updated weights for policy 0, policy_version 342332 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:58,975][24592] Fps is (10 sec: 40144.3, 60 sec: 41915.7, 300 sec: 42070.8). Total num frames: 2804432896. Throughput: 0: 10370.1. Samples: 451083960. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:17:58,977][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:17:59,736][626795] Updated weights for policy 0, policy_version 342342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:01,697][626795] Updated weights for policy 0, policy_version 342352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:03,553][626795] Updated weights for policy 0, policy_version 342362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:03,975][24592] Fps is (10 sec: 42654.8, 60 sec: 41779.2, 300 sec: 42043.0). Total num frames: 2804645888. Throughput: 0: 10374.1. Samples: 451148088. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:03,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:05,586][626795] Updated weights for policy 0, policy_version 342372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:07,475][626795] Updated weights for policy 0, policy_version 342382 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:08,982][24592] Fps is (10 sec: 41753.0, 60 sec: 41638.3, 300 sec: 42014.3). Total num frames: 2804850688. Throughput: 0: 10515.2. Samples: 451211802. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:08,984][24592] Avg episode reward: [(0, '4.532')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:09,336][626795] Updated weights for policy 0, policy_version 342392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:11,359][626795] Updated weights for policy 0, policy_version 342402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:13,150][626795] Updated weights for policy 0, policy_version 342412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:13,976][24592] Fps is (10 sec: 42596.4, 60 sec: 41778.9, 300 sec: 42042.9). Total num frames: 2805071872. Throughput: 0: 10524.4. Samples: 451243632. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:13,977][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:15,076][626795] Updated weights for policy 0, policy_version 342422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:16,968][626795] Updated weights for policy 0, policy_version 342432 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:18,975][24592] Fps is (10 sec: 42625.5, 60 sec: 41642.7, 300 sec: 42015.2). Total num frames: 2805276672. Throughput: 0: 10535.6. Samples: 451308234. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:18,976][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:18,988][626795] Updated weights for policy 0, policy_version 342442 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:20,831][626795] Updated weights for policy 0, policy_version 342452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:22,730][626795] Updated weights for policy 0, policy_version 342462 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:23,975][24592] Fps is (10 sec: 42600.2, 60 sec: 42325.3, 300 sec: 42043.0). Total num frames: 2805497856. Throughput: 0: 10530.5. Samples: 451372482. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:23,978][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:24,726][626795] Updated weights for policy 0, policy_version 342472 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:27,252][626795] Updated weights for policy 0, policy_version 342482 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:28,975][24592] Fps is (10 sec: 40140.6, 60 sec: 41779.2, 300 sec: 42015.2). Total num frames: 2805678080. Throughput: 0: 10505.7. Samples: 451403946. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:28,976][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:29,301][626795] Updated weights for policy 0, policy_version 342492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:31,096][626795] Updated weights for policy 0, policy_version 342502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:33,064][626795] Updated weights for policy 0, policy_version 342512 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:33,975][24592] Fps is (10 sec: 39321.6, 60 sec: 41642.6, 300 sec: 42015.2). Total num frames: 2805891072. Throughput: 0: 10333.9. Samples: 451460694. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:33,976][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:35,023][626795] Updated weights for policy 0, policy_version 342522 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:36,868][626795] Updated weights for policy 0, policy_version 342532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:38,713][626795] Updated weights for policy 0, policy_version 342542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:38,982][24592] Fps is (10 sec: 42572.6, 60 sec: 41775.0, 300 sec: 41986.7). Total num frames: 2806104064. Throughput: 0: 10437.7. Samples: 451525422. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:38,983][24592] Avg episode reward: [(0, '4.489')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:40,718][626795] Updated weights for policy 0, policy_version 342552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:42,527][626795] Updated weights for policy 0, policy_version 342562 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:43,975][24592] Fps is (10 sec: 43417.4, 60 sec: 41779.1, 300 sec: 42015.2). Total num frames: 2806325248. Throughput: 0: 10535.3. Samples: 451558050. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:43,977][24592] Avg episode reward: [(0, '4.404')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:44,481][626795] Updated weights for policy 0, policy_version 342572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:46,345][626795] Updated weights for policy 0, policy_version 342582 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:48,229][626795] Updated weights for policy 0, policy_version 342592 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:48,976][24592] Fps is (10 sec: 44261.5, 60 sec: 41916.0, 300 sec: 42043.1). Total num frames: 2806546432. Throughput: 0: 10570.0. Samples: 451623744. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:48,977][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:50,088][626795] Updated weights for policy 0, policy_version 342602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:51,916][626795] Updated weights for policy 0, policy_version 342612 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:53,886][626795] Updated weights for policy 0, policy_version 342622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:53,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42332.5, 300 sec: 42043.0). Total num frames: 2806759424. Throughput: 0: 10593.5. Samples: 451688442. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:53,977][24592] Avg episode reward: [(0, '4.779')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:55,736][626795] Updated weights for policy 0, policy_version 342632 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:18:57,649][626795] Updated weights for policy 0, policy_version 342642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:58,975][24592] Fps is (10 sec: 43419.7, 60 sec: 42461.9, 300 sec: 42127.7). Total num frames: 2806980608. Throughput: 0: 10596.1. Samples: 451720452. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:18:58,977][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:00,337][626795] Updated weights for policy 0, policy_version 342652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:02,285][626795] Updated weights for policy 0, policy_version 342662 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:03,976][24592] Fps is (10 sec: 40140.3, 60 sec: 41915.6, 300 sec: 42015.2). Total num frames: 2807160832. Throughput: 0: 10409.5. Samples: 451776666. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:03,977][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000342671_2807160832.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:04,131][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000341443_2797101056.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:04,169][626795] Updated weights for policy 0, policy_version 342672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:06,141][626795] Updated weights for policy 0, policy_version 342682 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:07,983][626795] Updated weights for policy 0, policy_version 342692 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:08,975][24592] Fps is (10 sec: 39321.8, 60 sec: 42056.7, 300 sec: 42015.2). Total num frames: 2807373824. Throughput: 0: 10413.8. Samples: 451841100. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:08,977][24592] Avg episode reward: [(0, '5.022')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:09,890][626795] Updated weights for policy 0, policy_version 342702 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:11,777][626795] Updated weights for policy 0, policy_version 342712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:13,557][626795] Updated weights for policy 0, policy_version 342722 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:13,975][24592] Fps is (10 sec: 42599.3, 60 sec: 41916.1, 300 sec: 42015.2). Total num frames: 2807586816. Throughput: 0: 10441.3. Samples: 451873806. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:13,976][24592] Avg episode reward: [(0, '4.502')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:15,599][626795] Updated weights for policy 0, policy_version 342732 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:17,504][626795] Updated weights for policy 0, policy_version 342742 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:18,975][24592] Fps is (10 sec: 43417.3, 60 sec: 42188.8, 300 sec: 42043.1). Total num frames: 2807808000. Throughput: 0: 10625.7. Samples: 451938852. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:18,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:19,318][626795] Updated weights for policy 0, policy_version 342752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:21,210][626795] Updated weights for policy 0, policy_version 342762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:23,025][626795] Updated weights for policy 0, policy_version 342772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:23,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42052.3, 300 sec: 42015.2). Total num frames: 2808020992. Throughput: 0: 10641.3. Samples: 452004216. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:23,977][24592] Avg episode reward: [(0, '4.505')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:24,951][626795] Updated weights for policy 0, policy_version 342782 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:26,837][626795] Updated weights for policy 0, policy_version 342792 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:28,716][626795] Updated weights for policy 0, policy_version 342802 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:28,976][24592] Fps is (10 sec: 43414.8, 60 sec: 42734.5, 300 sec: 42043.0). Total num frames: 2808242176. Throughput: 0: 10622.4. Samples: 452036064. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:28,977][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:30,728][626795] Updated weights for policy 0, policy_version 342812 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:33,408][626795] Updated weights for policy 0, policy_version 342822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:33,978][24592] Fps is (10 sec: 39310.1, 60 sec: 42050.2, 300 sec: 41987.1). Total num frames: 2808414208. Throughput: 0: 10568.6. Samples: 452099358. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:33,980][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:35,375][626795] Updated weights for policy 0, policy_version 342832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:37,272][626795] Updated weights for policy 0, policy_version 342842 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:38,976][24592] Fps is (10 sec: 38503.3, 60 sec: 42056.2, 300 sec: 41959.6). Total num frames: 2808627200. Throughput: 0: 10392.9. Samples: 452156124. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:38,977][24592] Avg episode reward: [(0, '4.431')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:39,247][626795] Updated weights for policy 0, policy_version 342852 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:41,138][626795] Updated weights for policy 0, policy_version 342862 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:42,900][626795] Updated weights for policy 0, policy_version 342872 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:43,975][24592] Fps is (10 sec: 44249.9, 60 sec: 42188.9, 300 sec: 42043.0). Total num frames: 2808856576. Throughput: 0: 10387.2. Samples: 452187876. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:43,976][24592] Avg episode reward: [(0, '4.484')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:44,789][626795] Updated weights for policy 0, policy_version 342882 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:46,627][626795] Updated weights for policy 0, policy_version 342892 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:48,530][626795] Updated weights for policy 0, policy_version 342902 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:48,975][24592] Fps is (10 sec: 44238.8, 60 sec: 42052.6, 300 sec: 42015.3). Total num frames: 2809069568. Throughput: 0: 10620.1. Samples: 452254566. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:48,977][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:50,542][626795] Updated weights for policy 0, policy_version 342912 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:52,329][626795] Updated weights for policy 0, policy_version 342922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:53,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42052.3, 300 sec: 42015.3). Total num frames: 2809282560. Throughput: 0: 10622.3. Samples: 452319102. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:53,976][24592] Avg episode reward: [(0, '4.394')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:54,292][626795] Updated weights for policy 0, policy_version 342932 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:56,201][626795] Updated weights for policy 0, policy_version 342942 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:19:58,209][626795] Updated weights for policy 0, policy_version 342952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:58,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41915.7, 300 sec: 42015.3). Total num frames: 2809495552. Throughput: 0: 10597.1. Samples: 452350674. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:19:58,977][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:00,086][626795] Updated weights for policy 0, policy_version 342962 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:01,970][626795] Updated weights for policy 0, policy_version 342972 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:03,862][626795] Updated weights for policy 0, policy_version 342982 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:03,976][24592] Fps is (10 sec: 43413.8, 60 sec: 42597.9, 300 sec: 42042.9). Total num frames: 2809716736. Throughput: 0: 10565.9. Samples: 452414328. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:03,977][24592] Avg episode reward: [(0, '4.826')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:06,566][626795] Updated weights for policy 0, policy_version 342992 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:08,383][626795] Updated weights for policy 0, policy_version 343002 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:08,976][24592] Fps is (10 sec: 40139.3, 60 sec: 42052.0, 300 sec: 41987.4). Total num frames: 2809896960. Throughput: 0: 10375.5. Samples: 452471118. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:08,977][24592] Avg episode reward: [(0, '4.854')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:10,393][626795] Updated weights for policy 0, policy_version 343012 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:12,399][626795] Updated weights for policy 0, policy_version 343022 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:13,975][24592] Fps is (10 sec: 38505.8, 60 sec: 41915.7, 300 sec: 41959.7). Total num frames: 2810101760. Throughput: 0: 10350.4. Samples: 452501826. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:13,976][24592] Avg episode reward: [(0, '4.252')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:14,379][626795] Updated weights for policy 0, policy_version 343032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:16,000][626795] Updated weights for policy 0, policy_version 343042 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:17,816][626795] Updated weights for policy 0, policy_version 343052 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:18,975][24592] Fps is (10 sec: 42599.9, 60 sec: 41915.8, 300 sec: 42015.3). Total num frames: 2810322944. Throughput: 0: 10436.0. Samples: 452568948. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:18,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:19,791][626795] Updated weights for policy 0, policy_version 343062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:21,752][626795] Updated weights for policy 0, policy_version 343072 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:23,499][626795] Updated weights for policy 0, policy_version 343082 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:23,975][24592] Fps is (10 sec: 43417.5, 60 sec: 41915.7, 300 sec: 41987.5). Total num frames: 2810535936. Throughput: 0: 10608.8. Samples: 452633514. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:23,977][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:25,418][626795] Updated weights for policy 0, policy_version 343092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:27,428][626795] Updated weights for policy 0, policy_version 343102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:28,976][24592] Fps is (10 sec: 43416.1, 60 sec: 41916.0, 300 sec: 42015.2). Total num frames: 2810757120. Throughput: 0: 10607.3. Samples: 452665206. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:28,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:29,322][626795] Updated weights for policy 0, policy_version 343112 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:31,171][626795] Updated weights for policy 0, policy_version 343122 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:33,111][626795] Updated weights for policy 0, policy_version 343132 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:33,975][24592] Fps is (10 sec: 43417.4, 60 sec: 42600.5, 300 sec: 41987.6). Total num frames: 2810970112. Throughput: 0: 10574.4. Samples: 452730414. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:33,976][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:34,916][626795] Updated weights for policy 0, policy_version 343142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:36,948][626795] Updated weights for policy 0, policy_version 343152 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:39,303][24592] Fps is (10 sec: 39662.3, 60 sec: 42095.8, 300 sec: 41996.4). Total num frames: 2811166720. Throughput: 0: 9772.9. Samples: 452762082. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:39,304][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:39,726][626795] Updated weights for policy 0, policy_version 343162 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:41,551][626795] Updated weights for policy 0, policy_version 343172 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:43,465][626795] Updated weights for policy 0, policy_version 343182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:43,976][24592] Fps is (10 sec: 39318.6, 60 sec: 41778.6, 300 sec: 41987.4). Total num frames: 2811363328. Throughput: 0: 10387.1. Samples: 452818104. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:43,978][24592] Avg episode reward: [(0, '4.728')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:45,558][626795] Updated weights for policy 0, policy_version 343192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:47,158][626795] Updated weights for policy 0, policy_version 343202 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:48,977][24592] Fps is (10 sec: 42342.9, 60 sec: 41778.5, 300 sec: 41959.6). Total num frames: 2811576320. Throughput: 0: 10391.3. Samples: 452881938. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:48,978][24592] Avg episode reward: [(0, '4.686')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:49,182][626795] Updated weights for policy 0, policy_version 343212 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:51,026][626795] Updated weights for policy 0, policy_version 343222 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:52,969][626795] Updated weights for policy 0, policy_version 343232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:53,975][24592] Fps is (10 sec: 42602.0, 60 sec: 41779.2, 300 sec: 41959.7). Total num frames: 2811789312. Throughput: 0: 10564.6. Samples: 452946522. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:53,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:54,857][626795] Updated weights for policy 0, policy_version 343242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:56,874][626795] Updated weights for policy 0, policy_version 343252 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:20:58,658][626795] Updated weights for policy 0, policy_version 343262 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:58,975][24592] Fps is (10 sec: 43421.3, 60 sec: 41915.7, 300 sec: 41987.5). Total num frames: 2812010496. Throughput: 0: 10598.4. Samples: 452978754. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:20:58,977][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:00,726][626795] Updated weights for policy 0, policy_version 343272 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:02,500][626795] Updated weights for policy 0, policy_version 343282 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:03,975][24592] Fps is (10 sec: 43416.9, 60 sec: 41779.7, 300 sec: 41987.5). Total num frames: 2812223488. Throughput: 0: 10549.8. Samples: 453043692. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:03,977][24592] Avg episode reward: [(0, '4.681')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000343289_2812223488.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:04,139][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000342058_2802139136.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:04,488][626795] Updated weights for policy 0, policy_version 343292 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:06,492][626795] Updated weights for policy 0, policy_version 343302 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:08,342][626795] Updated weights for policy 0, policy_version 343312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:08,975][24592] Fps is (10 sec: 42598.2, 60 sec: 42325.5, 300 sec: 41987.5). Total num frames: 2812436480. Throughput: 0: 10525.0. Samples: 453107142. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:08,978][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:10,279][626795] Updated weights for policy 0, policy_version 343322 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:12,885][626795] Updated weights for policy 0, policy_version 343332 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:13,975][24592] Fps is (10 sec: 39322.1, 60 sec: 41915.7, 300 sec: 41987.5). Total num frames: 2812616704. Throughput: 0: 10528.5. Samples: 453138984. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:13,976][24592] Avg episode reward: [(0, '4.614')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:14,811][626795] Updated weights for policy 0, policy_version 343342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:16,702][626795] Updated weights for policy 0, policy_version 343352 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:18,675][626795] Updated weights for policy 0, policy_version 343362 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:18,975][24592] Fps is (10 sec: 39322.0, 60 sec: 41779.2, 300 sec: 41959.7). Total num frames: 2812829696. Throughput: 0: 10327.3. Samples: 453195144. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:18,977][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:20,502][626795] Updated weights for policy 0, policy_version 343372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:22,280][626795] Updated weights for policy 0, policy_version 343382 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:23,976][24592] Fps is (10 sec: 43414.1, 60 sec: 41915.2, 300 sec: 41959.6). Total num frames: 2813050880. Throughput: 0: 11206.8. Samples: 453262728. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:23,978][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:24,141][626795] Updated weights for policy 0, policy_version 343392 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:26,041][626795] Updated weights for policy 0, policy_version 343402 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:27,885][626795] Updated weights for policy 0, policy_version 343412 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:28,976][24592] Fps is (10 sec: 44235.2, 60 sec: 41915.7, 300 sec: 41987.4). Total num frames: 2813272064. Throughput: 0: 10602.5. Samples: 453295212. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:28,977][24592] Avg episode reward: [(0, '4.900')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:29,770][626795] Updated weights for policy 0, policy_version 343422 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:31,703][626795] Updated weights for policy 0, policy_version 343432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:33,567][626795] Updated weights for policy 0, policy_version 343442 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:33,975][24592] Fps is (10 sec: 44240.3, 60 sec: 42052.3, 300 sec: 42015.2). Total num frames: 2813493248. Throughput: 0: 10622.5. Samples: 453359940. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:33,977][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:35,541][626795] Updated weights for policy 0, policy_version 343452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:37,315][626795] Updated weights for policy 0, policy_version 343462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:38,975][24592] Fps is (10 sec: 43419.0, 60 sec: 42557.6, 300 sec: 42015.2). Total num frames: 2813706240. Throughput: 0: 10638.4. Samples: 453425250. Policy #0 lag: (min: 0.0, avg: 2.0, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:38,977][24592] Avg episode reward: [(0, '4.517')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:39,262][626795] Updated weights for policy 0, policy_version 343472 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:41,198][626795] Updated weights for policy 0, policy_version 343482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:43,060][626795] Updated weights for policy 0, policy_version 343492 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:43,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42599.0, 300 sec: 42126.6). Total num frames: 2813919232. Throughput: 0: 10626.8. Samples: 453456960. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:43,976][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:45,824][626795] Updated weights for policy 0, policy_version 343502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:47,755][626795] Updated weights for policy 0, policy_version 343512 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:48,975][24592] Fps is (10 sec: 38502.5, 60 sec: 41916.4, 300 sec: 41987.5). Total num frames: 2814091264. Throughput: 0: 10413.8. Samples: 453512310. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:48,976][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:49,761][626795] Updated weights for policy 0, policy_version 343522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:51,596][626795] Updated weights for policy 0, policy_version 343532 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:53,416][626795] Updated weights for policy 0, policy_version 343542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:53,976][24592] Fps is (10 sec: 39320.1, 60 sec: 42052.0, 300 sec: 42015.2). Total num frames: 2814312448. Throughput: 0: 10448.2. Samples: 453577314. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:53,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:55,318][626795] Updated weights for policy 0, policy_version 343552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:57,072][626795] Updated weights for policy 0, policy_version 343562 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:21:58,975][626795] Updated weights for policy 0, policy_version 343572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:58,975][24592] Fps is (10 sec: 45055.4, 60 sec: 42188.7, 300 sec: 42043.0). Total num frames: 2814541824. Throughput: 0: 10485.0. Samples: 453610812. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:21:58,976][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:00,949][626795] Updated weights for policy 0, policy_version 343582 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:02,654][626795] Updated weights for policy 0, policy_version 343592 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:03,978][24592] Fps is (10 sec: 44226.4, 60 sec: 42187.0, 300 sec: 42042.6). Total num frames: 2814754816. Throughput: 0: 10687.4. Samples: 453676104. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:03,979][24592] Avg episode reward: [(0, '4.975')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:04,648][626795] Updated weights for policy 0, policy_version 343602 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:06,551][626795] Updated weights for policy 0, policy_version 343612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:08,435][626795] Updated weights for policy 0, policy_version 343622 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:08,978][24592] Fps is (10 sec: 42588.7, 60 sec: 42187.2, 300 sec: 42042.7). Total num frames: 2814967808. Throughput: 0: 10636.0. Samples: 453741366. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:08,979][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:10,370][626795] Updated weights for policy 0, policy_version 343632 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:12,271][626795] Updated weights for policy 0, policy_version 343642 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:13,975][24592] Fps is (10 sec: 43429.2, 60 sec: 42871.4, 300 sec: 42070.8). Total num frames: 2815188992. Throughput: 0: 10625.4. Samples: 453773352. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:13,977][24592] Avg episode reward: [(0, '4.632')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:14,133][626795] Updated weights for policy 0, policy_version 343652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:16,155][626795] Updated weights for policy 0, policy_version 343662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:18,788][626795] Updated weights for policy 0, policy_version 343672 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:18,975][24592] Fps is (10 sec: 39330.8, 60 sec: 42188.7, 300 sec: 42043.0). Total num frames: 2815361024. Throughput: 0: 10611.0. Samples: 453837438. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:18,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:20,757][626795] Updated weights for policy 0, policy_version 343682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:22,621][626795] Updated weights for policy 0, policy_version 343692 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:23,976][24592] Fps is (10 sec: 39319.3, 60 sec: 42188.9, 300 sec: 42070.7). Total num frames: 2815582208. Throughput: 0: 10383.9. Samples: 453892530. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:23,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:24,614][626795] Updated weights for policy 0, policy_version 343702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:26,506][626795] Updated weights for policy 0, policy_version 343712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:28,428][626795] Updated weights for policy 0, policy_version 343722 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:28,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42052.5, 300 sec: 42043.0). Total num frames: 2815795200. Throughput: 0: 10392.8. Samples: 453924636. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:28,976][24592] Avg episode reward: [(0, '4.396')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:30,323][626795] Updated weights for policy 0, policy_version 343732 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:32,164][626795] Updated weights for policy 0, policy_version 343742 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:33,976][24592] Fps is (10 sec: 42600.2, 60 sec: 41915.6, 300 sec: 42070.7). Total num frames: 2816008192. Throughput: 0: 10599.2. Samples: 453989274. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:33,977][24592] Avg episode reward: [(0, '4.821')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:34,124][626795] Updated weights for policy 0, policy_version 343752 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:36,086][626795] Updated weights for policy 0, policy_version 343762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:37,885][626795] Updated weights for policy 0, policy_version 343772 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:38,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41915.7, 300 sec: 42043.0). Total num frames: 2816221184. Throughput: 0: 10579.8. Samples: 454053402. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:38,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:39,829][626795] Updated weights for policy 0, policy_version 343782 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:41,739][626795] Updated weights for policy 0, policy_version 343792 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:43,641][626795] Updated weights for policy 0, policy_version 343802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:43,975][24592] Fps is (10 sec: 42599.3, 60 sec: 41915.7, 300 sec: 42043.1). Total num frames: 2816434176. Throughput: 0: 10559.4. Samples: 454085982. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:43,977][24592] Avg episode reward: [(0, '4.912')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:45,471][626795] Updated weights for policy 0, policy_version 343812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:47,480][626795] Updated weights for policy 0, policy_version 343822 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:48,976][24592] Fps is (10 sec: 42597.2, 60 sec: 42598.1, 300 sec: 42127.5). Total num frames: 2816647168. Throughput: 0: 10538.9. Samples: 454150332. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:48,977][24592] Avg episode reward: [(0, '4.409')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:49,364][626795] Updated weights for policy 0, policy_version 343832 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:51,863][626795] Updated weights for policy 0, policy_version 343842 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:53,669][626795] Updated weights for policy 0, policy_version 343852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:53,984][24592] Fps is (10 sec: 40926.2, 60 sec: 42183.2, 300 sec: 42069.6). Total num frames: 2816843776. Throughput: 0: 10390.8. Samples: 454209012. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:53,985][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:55,715][626795] Updated weights for policy 0, policy_version 343862 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:57,586][626795] Updated weights for policy 0, policy_version 343872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:58,975][24592] Fps is (10 sec: 40961.7, 60 sec: 41915.9, 300 sec: 42070.8). Total num frames: 2817056768. Throughput: 0: 10380.5. Samples: 454240476. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:22:58,978][24592] Avg episode reward: [(0, '4.669')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:22:59,638][626795] Updated weights for policy 0, policy_version 343882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:01,416][626795] Updated weights for policy 0, policy_version 343892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:03,259][626795] Updated weights for policy 0, policy_version 343902 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:03,975][24592] Fps is (10 sec: 42633.6, 60 sec: 41917.6, 300 sec: 42099.4). Total num frames: 2817269760. Throughput: 0: 10390.8. Samples: 454305024. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:03,977][24592] Avg episode reward: [(0, '4.425')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000343905_2817269760.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:04,134][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000342671_2807160832.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:05,330][626795] Updated weights for policy 0, policy_version 343912 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:07,141][626795] Updated weights for policy 0, policy_version 343922 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:08,976][24592] Fps is (10 sec: 42597.0, 60 sec: 41917.2, 300 sec: 42070.8). Total num frames: 2817482752. Throughput: 0: 10592.9. Samples: 454369206. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:08,977][24592] Avg episode reward: [(0, '4.420')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:09,039][626795] Updated weights for policy 0, policy_version 343932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:10,919][626795] Updated weights for policy 0, policy_version 343942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:12,872][626795] Updated weights for policy 0, policy_version 343952 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:13,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41779.2, 300 sec: 42098.5). Total num frames: 2817695744. Throughput: 0: 10587.6. Samples: 454401078. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:13,977][24592] Avg episode reward: [(0, '4.625')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:14,848][626795] Updated weights for policy 0, policy_version 343962 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:16,664][626795] Updated weights for policy 0, policy_version 343972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:18,547][626795] Updated weights for policy 0, policy_version 343982 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:18,975][24592] Fps is (10 sec: 42599.7, 60 sec: 42461.9, 300 sec: 42070.8). Total num frames: 2817908736. Throughput: 0: 10582.2. Samples: 454465470. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:18,976][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:20,570][626795] Updated weights for policy 0, policy_version 343992 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:22,415][626795] Updated weights for policy 0, policy_version 344002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:24,104][24592] Fps is (10 sec: 40441.1, 60 sec: 41962.9, 300 sec: 42108.0). Total num frames: 2818105344. Throughput: 0: 9839.4. Samples: 454497438. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:24,105][24592] Avg episode reward: [(0, '4.673')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:24,992][626795] Updated weights for policy 0, policy_version 344012 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:26,908][626795] Updated weights for policy 0, policy_version 344022 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:28,782][626795] Updated weights for policy 0, policy_version 344032 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:28,976][24592] Fps is (10 sec: 40956.8, 60 sec: 42051.8, 300 sec: 42126.2). Total num frames: 2818318336. Throughput: 0: 10421.7. Samples: 454554966. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:28,977][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:30,698][626795] Updated weights for policy 0, policy_version 344042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:32,570][626795] Updated weights for policy 0, policy_version 344052 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:33,975][24592] Fps is (10 sec: 43152.2, 60 sec: 42052.4, 300 sec: 42127.2). Total num frames: 2818531328. Throughput: 0: 10436.9. Samples: 454619988. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:33,976][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:34,593][626795] Updated weights for policy 0, policy_version 344062 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:36,295][626795] Updated weights for policy 0, policy_version 344072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:36,910][626772] Signal inference workers to stop experience collection... (5850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:36,914][626772] Signal inference workers to resume experience collection... (5850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:36,926][626795] InferenceWorker_p0-w0: stopping experience collection (5850 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:36,927][626795] InferenceWorker_p0-w0: resuming experience collection (5850 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:38,326][626795] Updated weights for policy 0, policy_version 344082 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:38,976][24592] Fps is (10 sec: 42599.2, 60 sec: 42051.9, 300 sec: 42098.5). Total num frames: 2818744320. Throughput: 0: 10559.9. Samples: 454684128. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:38,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:40,204][626795] Updated weights for policy 0, policy_version 344092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:42,099][626795] Updated weights for policy 0, policy_version 344102 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:43,976][24592] Fps is (10 sec: 42596.2, 60 sec: 42051.9, 300 sec: 42070.8). Total num frames: 2818957312. Throughput: 0: 10579.3. Samples: 454716552. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:43,977][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:43,981][626795] Updated weights for policy 0, policy_version 344112 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:45,905][626795] Updated weights for policy 0, policy_version 344122 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:47,722][626795] Updated weights for policy 0, policy_version 344132 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:48,976][24592] Fps is (10 sec: 43418.2, 60 sec: 42188.7, 300 sec: 42098.5). Total num frames: 2819178496. Throughput: 0: 10591.4. Samples: 454781640. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:48,977][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:49,759][626795] Updated weights for policy 0, policy_version 344142 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:51,523][626795] Updated weights for policy 0, policy_version 344152 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:53,434][626795] Updated weights for policy 0, policy_version 344162 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:53,976][24592] Fps is (10 sec: 43418.7, 60 sec: 42467.5, 300 sec: 42070.7). Total num frames: 2819391488. Throughput: 0: 10612.4. Samples: 454846764. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:53,977][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:55,396][626795] Updated weights for policy 0, policy_version 344172 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:57,962][626795] Updated weights for policy 0, policy_version 344182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:58,975][24592] Fps is (10 sec: 40142.8, 60 sec: 42052.3, 300 sec: 42098.6). Total num frames: 2819579904. Throughput: 0: 10549.6. Samples: 454875810. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:23:58,976][24592] Avg episode reward: [(0, '4.739')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:23:59,888][626795] Updated weights for policy 0, policy_version 344192 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:01,739][626795] Updated weights for policy 0, policy_version 344202 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:03,644][626795] Updated weights for policy 0, policy_version 344212 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:03,983][24592] Fps is (10 sec: 40111.2, 60 sec: 42046.9, 300 sec: 42097.5). Total num frames: 2819792896. Throughput: 0: 10464.5. Samples: 454936452. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:03,984][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:05,565][626795] Updated weights for policy 0, policy_version 344222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:07,419][626795] Updated weights for policy 0, policy_version 344232 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:08,975][24592] Fps is (10 sec: 43417.6, 60 sec: 42189.0, 300 sec: 42126.3). Total num frames: 2820014080. Throughput: 0: 11234.6. Samples: 455001552. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:08,977][24592] Avg episode reward: [(0, '4.449')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:09,316][626795] Updated weights for policy 0, policy_version 344242 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:11,142][626795] Updated weights for policy 0, policy_version 344252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:13,028][626795] Updated weights for policy 0, policy_version 344262 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:13,975][24592] Fps is (10 sec: 43451.0, 60 sec: 42188.9, 300 sec: 42098.6). Total num frames: 2820227072. Throughput: 0: 10633.5. Samples: 455033466. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:13,977][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:14,952][626795] Updated weights for policy 0, policy_version 344272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:16,815][626795] Updated weights for policy 0, policy_version 344282 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:18,744][626795] Updated weights for policy 0, policy_version 344292 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:18,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42325.3, 300 sec: 42126.3). Total num frames: 2820448256. Throughput: 0: 10635.1. Samples: 455098566. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:18,977][24592] Avg episode reward: [(0, '4.870')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:20,637][626795] Updated weights for policy 0, policy_version 344302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:22,556][626795] Updated weights for policy 0, policy_version 344312 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:23,975][24592] Fps is (10 sec: 43417.3, 60 sec: 42689.7, 300 sec: 42098.6). Total num frames: 2820661248. Throughput: 0: 10653.1. Samples: 455163510. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:23,976][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:24,548][626795] Updated weights for policy 0, policy_version 344322 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:26,394][626795] Updated weights for policy 0, policy_version 344332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:28,327][626795] Updated weights for policy 0, policy_version 344342 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:28,975][24592] Fps is (10 sec: 42598.5, 60 sec: 42599.0, 300 sec: 42237.8). Total num frames: 2820874240. Throughput: 0: 10637.2. Samples: 455195220. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:28,976][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:30,912][626795] Updated weights for policy 0, policy_version 344352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:32,737][626795] Updated weights for policy 0, policy_version 344362 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:33,975][24592] Fps is (10 sec: 40140.4, 60 sec: 42188.7, 300 sec: 42154.1). Total num frames: 2821062656. Throughput: 0: 10467.1. Samples: 455252658. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:33,977][24592] Avg episode reward: [(0, '4.833')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:34,685][626795] Updated weights for policy 0, policy_version 344372 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:36,644][626795] Updated weights for policy 0, policy_version 344382 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:38,371][626795] Updated weights for policy 0, policy_version 344392 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:38,975][24592] Fps is (10 sec: 40140.7, 60 sec: 42189.2, 300 sec: 42098.6). Total num frames: 2821275648. Throughput: 0: 10470.7. Samples: 455317944. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:38,976][24592] Avg episode reward: [(0, '4.904')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:40,383][626795] Updated weights for policy 0, policy_version 344402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:42,274][626795] Updated weights for policy 0, policy_version 344412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:43,975][24592] Fps is (10 sec: 43418.1, 60 sec: 42325.7, 300 sec: 42126.3). Total num frames: 2821496832. Throughput: 0: 10541.2. Samples: 455350164. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:43,977][24592] Avg episode reward: [(0, '4.824')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:44,063][626795] Updated weights for policy 0, policy_version 344422 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:46,049][626795] Updated weights for policy 0, policy_version 344432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:47,899][626795] Updated weights for policy 0, policy_version 344442 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:48,976][24592] Fps is (10 sec: 43415.2, 60 sec: 42188.7, 300 sec: 42126.2). Total num frames: 2821709824. Throughput: 0: 10643.1. Samples: 455415318. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:48,978][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:49,884][626795] Updated weights for policy 0, policy_version 344452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:51,717][626795] Updated weights for policy 0, policy_version 344462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:53,561][626795] Updated weights for policy 0, policy_version 344472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:53,975][24592] Fps is (10 sec: 42598.4, 60 sec: 42189.0, 300 sec: 42126.3). Total num frames: 2821922816. Throughput: 0: 10633.9. Samples: 455480076. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:53,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:55,514][626795] Updated weights for policy 0, policy_version 344482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:57,423][626795] Updated weights for policy 0, policy_version 344492 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:58,976][24592] Fps is (10 sec: 43418.5, 60 sec: 42734.7, 300 sec: 42126.4). Total num frames: 2822144000. Throughput: 0: 10639.0. Samples: 455512224. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:24:58,977][24592] Avg episode reward: [(0, '4.854')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:24:59,406][626795] Updated weights for policy 0, policy_version 344502 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:01,116][626795] Updated weights for policy 0, policy_version 344512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:03,715][626795] Updated weights for policy 0, policy_version 344522 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:03,975][24592] Fps is (10 sec: 40959.5, 60 sec: 42330.6, 300 sec: 42154.1). Total num frames: 2822332416. Throughput: 0: 10552.2. Samples: 455573418. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:03,976][24592] Avg episode reward: [(0, '4.656')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:03,979][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000344523_2822332416.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:04,148][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000343289_2812223488.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:05,632][626795] Updated weights for policy 0, policy_version 344532 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:07,618][626795] Updated weights for policy 0, policy_version 344542 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:08,975][24592] Fps is (10 sec: 40142.3, 60 sec: 42188.8, 300 sec: 42181.9). Total num frames: 2822545408. Throughput: 0: 10475.2. Samples: 455634894. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:08,977][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:09,440][626795] Updated weights for policy 0, policy_version 344552 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:11,304][626795] Updated weights for policy 0, policy_version 344562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:13,226][626795] Updated weights for policy 0, policy_version 344572 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:13,975][24592] Fps is (10 sec: 43418.3, 60 sec: 42325.3, 300 sec: 42181.9). Total num frames: 2822766592. Throughput: 0: 10484.8. Samples: 455667036. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:13,976][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:15,151][626795] Updated weights for policy 0, policy_version 344582 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:17,010][626795] Updated weights for policy 0, policy_version 344592 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:18,856][626795] Updated weights for policy 0, policy_version 344602 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:18,978][24592] Fps is (10 sec: 43406.8, 60 sec: 42187.1, 300 sec: 42181.5). Total num frames: 2822979584. Throughput: 0: 10634.7. Samples: 455731242. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:18,979][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:20,867][626795] Updated weights for policy 0, policy_version 344612 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:22,754][626795] Updated weights for policy 0, policy_version 344622 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:23,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42188.8, 300 sec: 42154.1). Total num frames: 2823192576. Throughput: 0: 10636.1. Samples: 455796570. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:23,977][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:24,635][626795] Updated weights for policy 0, policy_version 344632 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:26,500][626795] Updated weights for policy 0, policy_version 344642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:28,403][626795] Updated weights for policy 0, policy_version 344652 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:28,977][24592] Fps is (10 sec: 43423.5, 60 sec: 42324.5, 300 sec: 42181.7). Total num frames: 2823413760. Throughput: 0: 10642.9. Samples: 455829108. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:28,978][24592] Avg episode reward: [(0, '4.778')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:30,331][626795] Updated weights for policy 0, policy_version 344662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:32,200][626795] Updated weights for policy 0, policy_version 344672 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:33,975][24592] Fps is (10 sec: 43417.4, 60 sec: 42735.0, 300 sec: 42284.3). Total num frames: 2823626752. Throughput: 0: 10631.9. Samples: 455893746. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:33,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:34,216][626795] Updated weights for policy 0, policy_version 344682 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:36,815][626795] Updated weights for policy 0, policy_version 344692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:38,622][626795] Updated weights for policy 0, policy_version 344702 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:38,975][24592] Fps is (10 sec: 39325.8, 60 sec: 42188.8, 300 sec: 42182.0). Total num frames: 2823806976. Throughput: 0: 10468.8. Samples: 455951172. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:38,978][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:40,479][626795] Updated weights for policy 0, policy_version 344712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:42,530][626795] Updated weights for policy 0, policy_version 344722 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:43,976][24592] Fps is (10 sec: 40139.7, 60 sec: 42188.6, 300 sec: 42209.7). Total num frames: 2824028160. Throughput: 0: 10448.5. Samples: 455982408. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:43,977][24592] Avg episode reward: [(0, '4.838')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:44,343][626795] Updated weights for policy 0, policy_version 344732 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:46,161][626795] Updated weights for policy 0, policy_version 344742 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:48,049][626795] Updated weights for policy 0, policy_version 344752 (0.0032)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:48,975][24592] Fps is (10 sec: 43418.0, 60 sec: 42189.2, 300 sec: 42209.6). Total num frames: 2824241152. Throughput: 0: 10540.8. Samples: 456047754. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:48,976][24592] Avg episode reward: [(0, '4.853')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:50,055][626795] Updated weights for policy 0, policy_version 344762 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:52,002][626795] Updated weights for policy 0, policy_version 344772 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:53,847][626795] Updated weights for policy 0, policy_version 344782 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:53,976][24592] Fps is (10 sec: 42597.8, 60 sec: 42188.5, 300 sec: 42181.8). Total num frames: 2824454144. Throughput: 0: 10602.3. Samples: 456112002. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:53,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:55,787][626795] Updated weights for policy 0, policy_version 344792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:57,738][626795] Updated weights for policy 0, policy_version 344802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:58,976][24592] Fps is (10 sec: 42596.8, 60 sec: 42052.3, 300 sec: 42181.8). Total num frames: 2824667136. Throughput: 0: 10601.1. Samples: 456144090. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:25:58,978][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:25:59,743][626795] Updated weights for policy 0, policy_version 344812 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:01,587][626795] Updated weights for policy 0, policy_version 344822 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:03,403][626795] Updated weights for policy 0, policy_version 344832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:03,975][24592] Fps is (10 sec: 42599.8, 60 sec: 42461.9, 300 sec: 42181.9). Total num frames: 2824880128. Throughput: 0: 10584.4. Samples: 456207516. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:03,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:05,423][626795] Updated weights for policy 0, policy_version 344842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:07,272][626795] Updated weights for policy 0, policy_version 344852 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:08,975][24592] Fps is (10 sec: 40142.0, 60 sec: 42052.2, 300 sec: 42209.6). Total num frames: 2825068544. Throughput: 0: 10444.8. Samples: 456266586. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:08,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:09,899][626795] Updated weights for policy 0, policy_version 344862 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:11,836][626795] Updated weights for policy 0, policy_version 344872 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:13,649][626795] Updated weights for policy 0, policy_version 344882 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:13,976][24592] Fps is (10 sec: 40139.9, 60 sec: 41915.5, 300 sec: 42209.6). Total num frames: 2825281536. Throughput: 0: 10401.1. Samples: 456297150. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:13,977][24592] Avg episode reward: [(0, '4.839')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:15,542][626795] Updated weights for policy 0, policy_version 344892 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:17,498][626795] Updated weights for policy 0, policy_version 344902 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:18,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41917.5, 300 sec: 42182.0). Total num frames: 2825494528. Throughput: 0: 10402.5. Samples: 456361860. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:18,977][24592] Avg episode reward: [(0, '4.784')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:19,411][626795] Updated weights for policy 0, policy_version 344912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:21,249][626795] Updated weights for policy 0, policy_version 344922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:23,197][626795] Updated weights for policy 0, policy_version 344932 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:23,975][24592] Fps is (10 sec: 42599.9, 60 sec: 41915.7, 300 sec: 42154.1). Total num frames: 2825707520. Throughput: 0: 10543.3. Samples: 456425622. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:23,991][24592] Avg episode reward: [(0, '4.432')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:25,186][626795] Updated weights for policy 0, policy_version 344942 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:27,126][626795] Updated weights for policy 0, policy_version 344952 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:28,896][626795] Updated weights for policy 0, policy_version 344962 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:28,975][24592] Fps is (10 sec: 43417.0, 60 sec: 41916.4, 300 sec: 42154.1). Total num frames: 2825928704. Throughput: 0: 10563.2. Samples: 456457752. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:28,977][24592] Avg episode reward: [(0, '4.869')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:30,766][626795] Updated weights for policy 0, policy_version 344972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:32,678][626795] Updated weights for policy 0, policy_version 344982 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:33,975][24592] Fps is (10 sec: 43417.6, 60 sec: 41915.8, 300 sec: 42154.1). Total num frames: 2826141696. Throughput: 0: 10573.6. Samples: 456523566. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:33,976][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:34,602][626795] Updated weights for policy 0, policy_version 344992 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:36,439][626795] Updated weights for policy 0, policy_version 345002 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:38,349][626795] Updated weights for policy 0, policy_version 345012 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:38,975][24592] Fps is (10 sec: 42598.8, 60 sec: 42461.9, 300 sec: 42154.1). Total num frames: 2826354688. Throughput: 0: 10589.8. Samples: 456588540. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:38,976][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:40,346][626795] Updated weights for policy 0, policy_version 345022 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:42,857][626795] Updated weights for policy 0, policy_version 345032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:43,975][24592] Fps is (10 sec: 40959.8, 60 sec: 42052.5, 300 sec: 42237.4). Total num frames: 2826551296. Throughput: 0: 10441.0. Samples: 456613932. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:43,977][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:44,798][626795] Updated weights for policy 0, policy_version 345042 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:46,672][626795] Updated weights for policy 0, policy_version 345052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:48,574][626795] Updated weights for policy 0, policy_version 345062 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:48,975][24592] Fps is (10 sec: 40960.2, 60 sec: 42052.2, 300 sec: 42209.7). Total num frames: 2826764288. Throughput: 0: 10446.6. Samples: 456677610. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:48,976][24592] Avg episode reward: [(0, '4.966')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:50,529][626795] Updated weights for policy 0, policy_version 345072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:52,292][626795] Updated weights for policy 0, policy_version 345082 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:53,977][24592] Fps is (10 sec: 42591.1, 60 sec: 42051.3, 300 sec: 42153.9). Total num frames: 2826977280. Throughput: 0: 10559.1. Samples: 456741762. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:53,981][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:54,397][626795] Updated weights for policy 0, policy_version 345092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:56,205][626795] Updated weights for policy 0, policy_version 345102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:26:58,108][626795] Updated weights for policy 0, policy_version 345112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:58,975][24592] Fps is (10 sec: 42598.3, 60 sec: 42052.5, 300 sec: 42154.5). Total num frames: 2827190272. Throughput: 0: 10590.5. Samples: 456773718. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:26:58,977][24592] Avg episode reward: [(0, '4.777')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:00,108][626795] Updated weights for policy 0, policy_version 345122 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:01,952][626795] Updated weights for policy 0, policy_version 345132 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:03,824][626795] Updated weights for policy 0, policy_version 345142 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:03,975][24592] Fps is (10 sec: 42606.1, 60 sec: 42052.4, 300 sec: 42154.4). Total num frames: 2827403264. Throughput: 0: 10589.5. Samples: 456838386. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:03,976][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:03,987][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000345143_2827411456.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:04,126][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000343905_2817269760.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:05,745][626795] Updated weights for policy 0, policy_version 345152 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:07,640][626795] Updated weights for policy 0, policy_version 345162 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:08,975][24592] Fps is (10 sec: 43417.5, 60 sec: 42598.4, 300 sec: 42154.1). Total num frames: 2827624448. Throughput: 0: 10624.5. Samples: 456903726. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:08,976][24592] Avg episode reward: [(0, '4.618')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:09,547][626795] Updated weights for policy 0, policy_version 345172 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:11,327][626795] Updated weights for policy 0, policy_version 345182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:13,759][626795] Updated weights for policy 0, policy_version 345192 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:14,633][24592] Fps is (10 sec: 38431.4, 60 sec: 41731.4, 300 sec: 42115.7). Total num frames: 2827812864. Throughput: 0: 10469.9. Samples: 456935784. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:14,634][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:16,488][626795] Updated weights for policy 0, policy_version 345202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:18,337][626795] Updated weights for policy 0, policy_version 345212 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:18,975][24592] Fps is (10 sec: 37683.2, 60 sec: 41779.2, 300 sec: 42098.6). Total num frames: 2828001280. Throughput: 0: 10298.0. Samples: 456986976. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:18,976][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:20,396][626795] Updated weights for policy 0, policy_version 345222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:22,338][626795] Updated weights for policy 0, policy_version 345232 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:23,975][24592] Fps is (10 sec: 42090.4, 60 sec: 41642.6, 300 sec: 42070.8). Total num frames: 2828206080. Throughput: 0: 10267.8. Samples: 457050594. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:23,976][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:24,161][626795] Updated weights for policy 0, policy_version 345242 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:26,208][626795] Updated weights for policy 0, policy_version 345252 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:27,997][626795] Updated weights for policy 0, policy_version 345262 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:28,976][24592] Fps is (10 sec: 41778.5, 60 sec: 41506.1, 300 sec: 42070.8). Total num frames: 2828419072. Throughput: 0: 10402.2. Samples: 457082034. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:28,977][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:30,011][626795] Updated weights for policy 0, policy_version 345272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:31,839][626795] Updated weights for policy 0, policy_version 345282 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:33,826][626795] Updated weights for policy 0, policy_version 345292 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:33,975][24592] Fps is (10 sec: 43417.9, 60 sec: 41642.6, 300 sec: 42098.6). Total num frames: 2828640256. Throughput: 0: 10399.1. Samples: 457145568. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:33,977][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:35,960][626795] Updated weights for policy 0, policy_version 345302 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:38,281][626795] Updated weights for policy 0, policy_version 345312 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:38,975][24592] Fps is (10 sec: 40141.4, 60 sec: 41096.5, 300 sec: 41987.5). Total num frames: 2828820480. Throughput: 0: 10263.7. Samples: 457203612. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:38,976][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:40,484][626795] Updated weights for policy 0, policy_version 345322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:42,898][626795] Updated weights for policy 0, policy_version 345332 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:43,976][24592] Fps is (10 sec: 35223.7, 60 sec: 40686.6, 300 sec: 41848.6). Total num frames: 2828992512. Throughput: 0: 10164.1. Samples: 457231110. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:43,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:45,245][626795] Updated weights for policy 0, policy_version 345342 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:47,997][626795] Updated weights for policy 0, policy_version 345352 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:48,975][24592] Fps is (10 sec: 33587.3, 60 sec: 39867.7, 300 sec: 41738.7). Total num frames: 2829156352. Throughput: 0: 9774.1. Samples: 457278222. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:48,976][24592] Avg episode reward: [(0, '4.550')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:49,938][626795] Updated weights for policy 0, policy_version 345362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:51,938][626795] Updated weights for policy 0, policy_version 345372 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:53,976][24592] Fps is (10 sec: 36865.3, 60 sec: 39732.2, 300 sec: 41709.7). Total num frames: 2829361152. Throughput: 0: 9683.4. Samples: 457339482. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:53,977][24592] Avg episode reward: [(0, '4.925')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:53,987][626795] Updated weights for policy 0, policy_version 345382 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:56,217][626795] Updated weights for policy 0, policy_version 345392 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:27:58,223][626795] Updated weights for policy 0, policy_version 345402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:58,975][24592] Fps is (10 sec: 40140.4, 60 sec: 39458.1, 300 sec: 41654.2). Total num frames: 2829557760. Throughput: 0: 9748.4. Samples: 457368048. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:27:58,976][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:00,247][626795] Updated weights for policy 0, policy_version 345412 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:02,298][626795] Updated weights for policy 0, policy_version 345422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:03,975][24592] Fps is (10 sec: 40141.6, 60 sec: 39321.6, 300 sec: 41626.5). Total num frames: 2829762560. Throughput: 0: 9822.0. Samples: 457428966. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:03,977][24592] Avg episode reward: [(0, '4.839')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:04,355][626795] Updated weights for policy 0, policy_version 345432 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:06,381][626795] Updated weights for policy 0, policy_version 345442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:08,346][626795] Updated weights for policy 0, policy_version 345452 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:08,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39048.5, 300 sec: 41598.7). Total num frames: 2829967360. Throughput: 0: 9756.7. Samples: 457489644. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:08,977][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:10,428][626795] Updated weights for policy 0, policy_version 345462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:12,351][626795] Updated weights for policy 0, policy_version 345472 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:13,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39757.6, 300 sec: 41570.9). Total num frames: 2830172160. Throughput: 0: 9723.0. Samples: 457519566. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:13,976][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:14,394][626795] Updated weights for policy 0, policy_version 345482 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:16,279][626795] Updated weights for policy 0, policy_version 345492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:18,437][626795] Updated weights for policy 0, policy_version 345502 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:18,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39594.6, 300 sec: 41616.8). Total num frames: 2830376960. Throughput: 0: 9684.0. Samples: 457581348. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:18,977][24592] Avg episode reward: [(0, '4.840')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:21,016][626795] Updated weights for policy 0, policy_version 345512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:23,014][626795] Updated weights for policy 0, policy_version 345522 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:23,976][24592] Fps is (10 sec: 37682.2, 60 sec: 39048.4, 300 sec: 41459.9). Total num frames: 2830548992. Throughput: 0: 9607.0. Samples: 457635930. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:23,977][24592] Avg episode reward: [(0, '4.905')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:25,069][626795] Updated weights for policy 0, policy_version 345532 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:27,048][626795] Updated weights for policy 0, policy_version 345542 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:28,961][626795] Updated weights for policy 0, policy_version 345552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:28,976][24592] Fps is (10 sec: 38501.6, 60 sec: 39048.5, 300 sec: 41459.8). Total num frames: 2830761984. Throughput: 0: 9679.8. Samples: 457666698. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:28,976][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:30,915][626795] Updated weights for policy 0, policy_version 345562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:32,834][626795] Updated weights for policy 0, policy_version 345572 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:33,977][24592] Fps is (10 sec: 41774.5, 60 sec: 38774.6, 300 sec: 41432.0). Total num frames: 2830966784. Throughput: 0: 10037.0. Samples: 457729902. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:33,979][24592] Avg episode reward: [(0, '5.043')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:35,062][626795] Updated weights for policy 0, policy_version 345582 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:36,851][626795] Updated weights for policy 0, policy_version 345592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:38,911][626795] Updated weights for policy 0, policy_version 345602 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:38,975][24592] Fps is (10 sec: 40961.0, 60 sec: 39185.1, 300 sec: 41404.4). Total num frames: 2831171584. Throughput: 0: 10030.0. Samples: 457790832. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:38,979][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:41,067][626795] Updated weights for policy 0, policy_version 345612 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:43,145][626795] Updated weights for policy 0, policy_version 345622 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:43,975][24592] Fps is (10 sec: 40146.2, 60 sec: 39595.0, 300 sec: 41321.1). Total num frames: 2831368192. Throughput: 0: 10029.1. Samples: 457819356. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:43,977][24592] Avg episode reward: [(0, '4.587')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:45,028][626795] Updated weights for policy 0, policy_version 345632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:46,973][626795] Updated weights for policy 0, policy_version 345642 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:48,976][24592] Fps is (10 sec: 40140.9, 60 sec: 40277.3, 300 sec: 41293.3). Total num frames: 2831572992. Throughput: 0: 10059.7. Samples: 457881654. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:48,978][24592] Avg episode reward: [(0, '4.850')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:49,119][626795] Updated weights for policy 0, policy_version 345652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:51,059][626795] Updated weights for policy 0, policy_version 345662 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:53,878][626795] Updated weights for policy 0, policy_version 345672 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:53,975][24592] Fps is (10 sec: 37683.5, 60 sec: 39731.4, 300 sec: 41237.7). Total num frames: 2831745024. Throughput: 0: 9877.5. Samples: 457934130. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:53,977][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:55,956][626795] Updated weights for policy 0, policy_version 345682 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:57,940][626795] Updated weights for policy 0, policy_version 345692 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:58,975][24592] Fps is (10 sec: 37683.0, 60 sec: 39867.8, 300 sec: 41211.0). Total num frames: 2831949824. Throughput: 0: 9884.7. Samples: 457964376. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:28:58,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:28:59,968][626795] Updated weights for policy 0, policy_version 345702 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:01,944][626795] Updated weights for policy 0, policy_version 345712 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:03,985][24592] Fps is (10 sec: 40101.2, 60 sec: 39724.7, 300 sec: 41125.2). Total num frames: 2832146432. Throughput: 0: 9875.3. Samples: 458025834. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:03,987][24592] Avg episode reward: [(0, '4.815')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:04,050][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000345722_2832154624.pth...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:04,055][626795] Updated weights for policy 0, policy_version 345722 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:04,126][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000344523_2822332416.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:06,269][626795] Updated weights for policy 0, policy_version 345732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:08,113][626795] Updated weights for policy 0, policy_version 345742 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:08,975][24592] Fps is (10 sec: 40140.5, 60 sec: 39731.2, 300 sec: 41098.8). Total num frames: 2832351232. Throughput: 0: 10000.2. Samples: 458085936. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:08,977][24592] Avg episode reward: [(0, '4.533')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:10,092][626795] Updated weights for policy 0, policy_version 345752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:12,079][626795] Updated weights for policy 0, policy_version 345762 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:13,975][24592] Fps is (10 sec: 41000.5, 60 sec: 39731.2, 300 sec: 41043.3). Total num frames: 2832556032. Throughput: 0: 10003.8. Samples: 458116866. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:13,977][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:14,084][626795] Updated weights for policy 0, policy_version 345772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:16,020][626795] Updated weights for policy 0, policy_version 345782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:18,013][626795] Updated weights for policy 0, policy_version 345792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:18,975][24592] Fps is (10 sec: 40960.4, 60 sec: 39731.2, 300 sec: 41015.5). Total num frames: 2832760832. Throughput: 0: 9979.5. Samples: 458178966. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:18,977][24592] Avg episode reward: [(0, '4.500')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:19,993][626795] Updated weights for policy 0, policy_version 345802 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:22,222][626795] Updated weights for policy 0, policy_version 345812 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:23,975][24592] Fps is (10 sec: 40140.4, 60 sec: 40140.9, 300 sec: 40960.0). Total num frames: 2832957440. Throughput: 0: 9942.7. Samples: 458238252. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:23,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:24,266][626795] Updated weights for policy 0, policy_version 345822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:26,944][626795] Updated weights for policy 0, policy_version 345832 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:28,887][626795] Updated weights for policy 0, policy_version 345842 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:28,975][24592] Fps is (10 sec: 37683.3, 60 sec: 39594.8, 300 sec: 40932.3). Total num frames: 2833137664. Throughput: 0: 9829.9. Samples: 458261700. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:28,977][24592] Avg episode reward: [(0, '4.883')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:30,957][626795] Updated weights for policy 0, policy_version 345852 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:32,941][626795] Updated weights for policy 0, policy_version 345862 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:33,976][24592] Fps is (10 sec: 38502.1, 60 sec: 39595.5, 300 sec: 40904.4). Total num frames: 2833342464. Throughput: 0: 9821.7. Samples: 458323632. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:33,977][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:34,884][626795] Updated weights for policy 0, policy_version 345872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:36,909][626795] Updated weights for policy 0, policy_version 345882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:38,840][626795] Updated weights for policy 0, policy_version 345892 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:38,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39594.7, 300 sec: 40848.9). Total num frames: 2833547264. Throughput: 0: 10013.3. Samples: 458384730. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:38,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:41,539][626795] Updated weights for policy 0, policy_version 345902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:43,807][626795] Updated weights for policy 0, policy_version 345912 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:43,976][24592] Fps is (10 sec: 37683.5, 60 sec: 39185.0, 300 sec: 40710.1). Total num frames: 2833719296. Throughput: 0: 9886.5. Samples: 458409270. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:43,977][24592] Avg episode reward: [(0, '4.403')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:45,778][626795] Updated weights for policy 0, policy_version 345922 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:48,004][626795] Updated weights for policy 0, policy_version 345932 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:48,975][24592] Fps is (10 sec: 36864.1, 60 sec: 39048.5, 300 sec: 40654.5). Total num frames: 2833915904. Throughput: 0: 9780.1. Samples: 458465844. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:48,976][24592] Avg episode reward: [(0, '4.349')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:50,013][626795] Updated weights for policy 0, policy_version 345942 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:52,036][626795] Updated weights for policy 0, policy_version 345952 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:53,892][626795] Updated weights for policy 0, policy_version 345962 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:53,977][24592] Fps is (10 sec: 40132.8, 60 sec: 39593.3, 300 sec: 40598.8). Total num frames: 2834120704. Throughput: 0: 9821.3. Samples: 458527914. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:53,979][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:56,056][626795] Updated weights for policy 0, policy_version 345972 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:29:57,983][626795] Updated weights for policy 0, policy_version 345982 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:59,456][24592] Fps is (10 sec: 37520.2, 60 sec: 39009.5, 300 sec: 40533.1). Total num frames: 2834309120. Throughput: 0: 9692.7. Samples: 458557692. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:29:59,459][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:00,834][626795] Updated weights for policy 0, policy_version 345992 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:02,711][626795] Updated weights for policy 0, policy_version 346002 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:03,975][24592] Fps is (10 sec: 37690.7, 60 sec: 39191.4, 300 sec: 40515.7). Total num frames: 2834497536. Throughput: 0: 9624.9. Samples: 458612088. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:03,977][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:04,783][626795] Updated weights for policy 0, policy_version 346012 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:06,608][626795] Updated weights for policy 0, policy_version 346022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:08,600][626795] Updated weights for policy 0, policy_version 346032 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:08,975][24592] Fps is (10 sec: 42165.1, 60 sec: 39321.7, 300 sec: 40487.9). Total num frames: 2834710528. Throughput: 0: 9706.8. Samples: 458675058. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:08,976][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:10,752][626795] Updated weights for policy 0, policy_version 346042 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:12,736][626772] Signal inference workers to stop experience collection... (5900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:12,736][626772] Signal inference workers to resume experience collection... (5900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:12,761][626795] InferenceWorker_p0-w0: stopping experience collection (5900 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:12,761][626795] InferenceWorker_p0-w0: resuming experience collection (5900 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:13,187][626795] Updated weights for policy 0, policy_version 346052 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:13,975][24592] Fps is (10 sec: 38502.6, 60 sec: 38775.4, 300 sec: 40349.4). Total num frames: 2834882560. Throughput: 0: 9826.7. Samples: 458703900. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:13,976][24592] Avg episode reward: [(0, '5.020')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:15,533][626795] Updated weights for policy 0, policy_version 346062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:17,485][626795] Updated weights for policy 0, policy_version 346072 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:18,976][24592] Fps is (10 sec: 36043.8, 60 sec: 38502.3, 300 sec: 40265.7). Total num frames: 2835070976. Throughput: 0: 9634.7. Samples: 458757192. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:18,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:19,691][626795] Updated weights for policy 0, policy_version 346082 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:21,748][626795] Updated weights for policy 0, policy_version 346092 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:23,832][626795] Updated weights for policy 0, policy_version 346102 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:23,975][24592] Fps is (10 sec: 38502.6, 60 sec: 38502.5, 300 sec: 40182.6). Total num frames: 2835267584. Throughput: 0: 9586.0. Samples: 458816100. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:23,977][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:25,735][626795] Updated weights for policy 0, policy_version 346112 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:27,733][626795] Updated weights for policy 0, policy_version 346122 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:28,975][24592] Fps is (10 sec: 40141.6, 60 sec: 38912.0, 300 sec: 40154.7). Total num frames: 2835472384. Throughput: 0: 9737.1. Samples: 458847438. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:28,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:30,193][626795] Updated weights for policy 0, policy_version 346132 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:33,210][626795] Updated weights for policy 0, policy_version 346142 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:33,975][24592] Fps is (10 sec: 35225.5, 60 sec: 37956.4, 300 sec: 40043.6). Total num frames: 2835619840. Throughput: 0: 9498.5. Samples: 458893278. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:33,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:35,417][626795] Updated weights for policy 0, policy_version 346152 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:37,415][626795] Updated weights for policy 0, policy_version 346162 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:38,975][24592] Fps is (10 sec: 34406.6, 60 sec: 37819.7, 300 sec: 39960.3). Total num frames: 2835816448. Throughput: 0: 9408.0. Samples: 458951256. Policy #0 lag: (min: 0.0, avg: 1.8, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:38,976][24592] Avg episode reward: [(0, '4.536')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:39,688][626795] Updated weights for policy 0, policy_version 346172 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:41,693][626795] Updated weights for policy 0, policy_version 346182 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:43,749][626795] Updated weights for policy 0, policy_version 346192 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:43,978][24592] Fps is (10 sec: 39311.1, 60 sec: 38227.7, 300 sec: 39904.4). Total num frames: 2836013056. Throughput: 0: 9525.7. Samples: 458981802. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:43,979][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:45,858][626795] Updated weights for policy 0, policy_version 346202 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:47,876][626795] Updated weights for policy 0, policy_version 346212 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:48,975][24592] Fps is (10 sec: 39321.1, 60 sec: 38229.2, 300 sec: 39849.3). Total num frames: 2836209664. Throughput: 0: 9540.4. Samples: 459041406. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:48,977][24592] Avg episode reward: [(0, '4.791')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:50,053][626795] Updated weights for policy 0, policy_version 346222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:52,641][626795] Updated weights for policy 0, policy_version 346232 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:53,975][24592] Fps is (10 sec: 36874.0, 60 sec: 37684.5, 300 sec: 39710.4). Total num frames: 2836381696. Throughput: 0: 9312.7. Samples: 459094128. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:53,979][24592] Avg episode reward: [(0, '4.852')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:54,921][626795] Updated weights for policy 0, policy_version 346242 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:56,925][626795] Updated weights for policy 0, policy_version 346252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:58,975][24592] Fps is (10 sec: 36045.2, 60 sec: 37987.1, 300 sec: 39627.1). Total num frames: 2836570112. Throughput: 0: 9288.7. Samples: 459121890. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:30:58,977][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:30:59,168][626795] Updated weights for policy 0, policy_version 346262 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:01,079][626795] Updated weights for policy 0, policy_version 346272 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:03,144][626795] Updated weights for policy 0, policy_version 346282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:03,975][24592] Fps is (10 sec: 39321.4, 60 sec: 37956.3, 300 sec: 39682.6). Total num frames: 2836774912. Throughput: 0: 9433.1. Samples: 459181680. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:03,976][24592] Avg episode reward: [(0, '4.698')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000346286_2836774912.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:04,219][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000345143_2827411456.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:06,350][626795] Updated weights for policy 0, policy_version 346292 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:08,982][24592] Fps is (10 sec: 32747.6, 60 sec: 36450.6, 300 sec: 39376.4). Total num frames: 2836897792. Throughput: 0: 9071.1. Samples: 459224358. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:08,984][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:09,177][626795] Updated weights for policy 0, policy_version 346302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:11,445][626795] Updated weights for policy 0, policy_version 346312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:13,512][626795] Updated weights for policy 0, policy_version 346322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:13,975][24592] Fps is (10 sec: 30310.6, 60 sec: 36591.0, 300 sec: 39266.1). Total num frames: 2837078016. Throughput: 0: 8938.8. Samples: 459249684. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:13,977][24592] Avg episode reward: [(0, '4.747')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:15,646][626795] Updated weights for policy 0, policy_version 346332 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:17,592][626795] Updated weights for policy 0, policy_version 346342 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:18,976][24592] Fps is (10 sec: 38523.6, 60 sec: 36863.7, 300 sec: 39238.2). Total num frames: 2837282816. Throughput: 0: 9258.0. Samples: 459309894. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:18,977][24592] Avg episode reward: [(0, '4.352')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:19,884][626795] Updated weights for policy 0, policy_version 346352 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:22,069][626795] Updated weights for policy 0, policy_version 346362 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:23,918][626795] Updated weights for policy 0, policy_version 346372 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:23,975][24592] Fps is (10 sec: 40140.5, 60 sec: 36863.9, 300 sec: 39155.0). Total num frames: 2837479424. Throughput: 0: 9250.4. Samples: 459367524. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:23,977][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:25,955][626795] Updated weights for policy 0, policy_version 346382 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:27,949][626795] Updated weights for policy 0, policy_version 346392 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:28,975][24592] Fps is (10 sec: 40143.6, 60 sec: 36864.0, 300 sec: 39127.2). Total num frames: 2837684224. Throughput: 0: 9236.9. Samples: 459397440. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:28,977][24592] Avg episode reward: [(0, '4.498')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:29,957][626795] Updated weights for policy 0, policy_version 346402 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:31,975][626795] Updated weights for policy 0, policy_version 346412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:33,827][626795] Updated weights for policy 0, policy_version 346422 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 37956.2, 300 sec: 39127.2). Total num frames: 2837897216. Throughput: 0: 9307.3. Samples: 459460236. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:33,977][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:35,839][626795] Updated weights for policy 0, policy_version 346432 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:38,504][626795] Updated weights for policy 0, policy_version 346442 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:38,975][24592] Fps is (10 sec: 37683.3, 60 sec: 37410.1, 300 sec: 39016.1). Total num frames: 2838061056. Throughput: 0: 9350.4. Samples: 459514896. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:38,976][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:40,793][626795] Updated weights for policy 0, policy_version 346452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:42,663][626795] Updated weights for policy 0, policy_version 346462 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:43,978][24592] Fps is (10 sec: 36854.4, 60 sec: 37546.7, 300 sec: 38988.0). Total num frames: 2838265856. Throughput: 0: 9387.8. Samples: 459544368. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:43,979][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:44,787][626795] Updated weights for policy 0, policy_version 346472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:46,750][626795] Updated weights for policy 0, policy_version 346482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:48,662][626795] Updated weights for policy 0, policy_version 346492 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:48,975][24592] Fps is (10 sec: 40960.1, 60 sec: 37683.3, 300 sec: 38960.8). Total num frames: 2838470656. Throughput: 0: 9419.5. Samples: 459605556. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:48,976][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:50,580][626795] Updated weights for policy 0, policy_version 346502 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:52,592][626795] Updated weights for policy 0, policy_version 346512 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:53,975][24592] Fps is (10 sec: 40151.4, 60 sec: 38092.8, 300 sec: 38905.1). Total num frames: 2838667264. Throughput: 0: 9808.0. Samples: 459665658. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:53,977][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:54,887][626795] Updated weights for policy 0, policy_version 346522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:56,887][626795] Updated weights for policy 0, policy_version 346532 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:58,975][24592] Fps is (10 sec: 39321.3, 60 sec: 38229.3, 300 sec: 38849.5). Total num frames: 2838863872. Throughput: 0: 9910.1. Samples: 459695640. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:31:58,976][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:31:59,109][626795] Updated weights for policy 0, policy_version 346542 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:01,052][626795] Updated weights for policy 0, policy_version 346552 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:02,943][626795] Updated weights for policy 0, policy_version 346562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:03,975][24592] Fps is (10 sec: 40140.7, 60 sec: 38229.3, 300 sec: 38794.0). Total num frames: 2839068672. Throughput: 0: 9922.8. Samples: 459756414. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:03,976][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:05,130][626795] Updated weights for policy 0, policy_version 346572 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:07,167][626795] Updated weights for policy 0, policy_version 346582 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:08,976][24592] Fps is (10 sec: 40138.4, 60 sec: 39461.8, 300 sec: 38908.4). Total num frames: 2839265280. Throughput: 0: 9938.4. Samples: 459814758. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:08,979][24592] Avg episode reward: [(0, '4.413')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:09,407][626795] Updated weights for policy 0, policy_version 346592 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:12,331][626795] Updated weights for policy 0, policy_version 346602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:13,975][24592] Fps is (10 sec: 35225.7, 60 sec: 39048.5, 300 sec: 38710.7). Total num frames: 2839420928. Throughput: 0: 9772.7. Samples: 459837210. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:13,976][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:14,413][626795] Updated weights for policy 0, policy_version 346612 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:16,528][626795] Updated weights for policy 0, policy_version 346622 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:18,680][626795] Updated weights for policy 0, policy_version 346632 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:18,979][24592] Fps is (10 sec: 35215.5, 60 sec: 38910.2, 300 sec: 38682.5). Total num frames: 2839617536. Throughput: 0: 9617.0. Samples: 459893034. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:18,980][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:20,855][626795] Updated weights for policy 0, policy_version 346642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:22,912][626795] Updated weights for policy 0, policy_version 346652 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:23,975][24592] Fps is (10 sec: 39321.7, 60 sec: 38912.1, 300 sec: 38627.4). Total num frames: 2839814144. Throughput: 0: 9706.3. Samples: 459951678. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:23,977][24592] Avg episode reward: [(0, '4.924')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:24,921][626795] Updated weights for policy 0, policy_version 346662 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:27,012][626795] Updated weights for policy 0, policy_version 346672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:28,976][24592] Fps is (10 sec: 39334.6, 60 sec: 38775.3, 300 sec: 38544.0). Total num frames: 2840010752. Throughput: 0: 9708.5. Samples: 459981228. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:28,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:29,194][626795] Updated weights for policy 0, policy_version 346682 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:31,370][626795] Updated weights for policy 0, policy_version 346692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:33,452][626795] Updated weights for policy 0, policy_version 346702 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:33,975][24592] Fps is (10 sec: 38502.3, 60 sec: 38365.9, 300 sec: 38571.8). Total num frames: 2840199168. Throughput: 0: 9618.1. Samples: 460038372. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:33,977][24592] Avg episode reward: [(0, '4.804')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:35,479][626795] Updated weights for policy 0, policy_version 346712 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:37,482][626795] Updated weights for policy 0, policy_version 346722 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:38,975][24592] Fps is (10 sec: 38503.1, 60 sec: 38912.0, 300 sec: 38655.2). Total num frames: 2840395776. Throughput: 0: 9624.1. Samples: 460098744. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:38,976][24592] Avg episode reward: [(0, '4.842')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:39,771][626795] Updated weights for policy 0, policy_version 346732 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:41,844][626795] Updated weights for policy 0, policy_version 346742 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:44,254][24592] Fps is (10 sec: 35863.5, 60 sec: 38190.0, 300 sec: 38646.4). Total num frames: 2840567808. Throughput: 0: 9494.6. Samples: 460125546. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:44,257][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:44,883][626795] Updated weights for policy 0, policy_version 346752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:46,902][626795] Updated weights for policy 0, policy_version 346762 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:48,809][626795] Updated weights for policy 0, policy_version 346772 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:48,975][24592] Fps is (10 sec: 36044.6, 60 sec: 38092.7, 300 sec: 38627.4). Total num frames: 2840756224. Throughput: 0: 9350.5. Samples: 460177188. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:48,977][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:50,789][626795] Updated weights for policy 0, policy_version 346782 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:52,898][626795] Updated weights for policy 0, policy_version 346792 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:53,978][24592] Fps is (10 sec: 40441.1, 60 sec: 38228.0, 300 sec: 38654.9). Total num frames: 2840961024. Throughput: 0: 9411.6. Samples: 460238292. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:53,982][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:55,012][626795] Updated weights for policy 0, policy_version 346802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:56,956][626795] Updated weights for policy 0, policy_version 346812 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:32:58,974][626795] Updated weights for policy 0, policy_version 346822 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:58,975][24592] Fps is (10 sec: 40960.4, 60 sec: 38365.9, 300 sec: 38655.1). Total num frames: 2841165824. Throughput: 0: 9576.5. Samples: 460268154. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:32:58,976][24592] Avg episode reward: [(0, '4.875')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:01,215][626795] Updated weights for policy 0, policy_version 346832 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:03,299][626795] Updated weights for policy 0, policy_version 346842 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:03,975][24592] Fps is (10 sec: 39329.7, 60 sec: 38092.8, 300 sec: 38599.6). Total num frames: 2841354240. Throughput: 0: 9633.1. Samples: 460326492. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:03,979][24592] Avg episode reward: [(0, '4.633')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:03,983][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000346845_2841354240.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:04,119][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000345722_2832154624.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:05,440][626795] Updated weights for policy 0, policy_version 346852 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:07,564][626795] Updated weights for policy 0, policy_version 346862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:08,975][24592] Fps is (10 sec: 38502.5, 60 sec: 38093.2, 300 sec: 38571.8). Total num frames: 2841550848. Throughput: 0: 9621.7. Samples: 460384656. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:08,976][24592] Avg episode reward: [(0, '4.624')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:09,656][626795] Updated weights for policy 0, policy_version 346872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:11,769][626795] Updated weights for policy 0, policy_version 346882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:13,707][626795] Updated weights for policy 0, policy_version 346892 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:13,975][24592] Fps is (10 sec: 39321.5, 60 sec: 38775.4, 300 sec: 38544.0). Total num frames: 2841747456. Throughput: 0: 9613.4. Samples: 460413828. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:13,976][24592] Avg episode reward: [(0, '4.557')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:15,618][626795] Updated weights for policy 0, policy_version 346902 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:18,308][626795] Updated weights for policy 0, policy_version 346912 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:18,982][24592] Fps is (10 sec: 36838.6, 60 sec: 38363.7, 300 sec: 38543.2). Total num frames: 2841919488. Throughput: 0: 9569.5. Samples: 460469064. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:18,985][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:20,492][626795] Updated weights for policy 0, policy_version 346922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:22,471][626795] Updated weights for policy 0, policy_version 346932 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:23,975][24592] Fps is (10 sec: 37683.6, 60 sec: 38502.4, 300 sec: 38516.3). Total num frames: 2842124288. Throughput: 0: 9576.1. Samples: 460529670. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:23,977][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:24,478][626795] Updated weights for policy 0, policy_version 346942 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:26,668][626795] Updated weights for policy 0, policy_version 346952 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:28,693][626795] Updated weights for policy 0, policy_version 346962 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:28,975][24592] Fps is (10 sec: 40168.4, 60 sec: 38502.5, 300 sec: 38488.7). Total num frames: 2842320896. Throughput: 0: 9700.4. Samples: 460559358. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:28,976][24592] Avg episode reward: [(0, '4.854')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:30,777][626795] Updated weights for policy 0, policy_version 346972 (0.0038)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:32,890][626795] Updated weights for policy 0, policy_version 346982 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:33,975][24592] Fps is (10 sec: 39321.6, 60 sec: 38638.9, 300 sec: 38460.7). Total num frames: 2842517504. Throughput: 0: 9781.8. Samples: 460617366. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:33,977][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:35,007][626795] Updated weights for policy 0, policy_version 346992 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:37,075][626795] Updated weights for policy 0, policy_version 347002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:38,975][24592] Fps is (10 sec: 39321.4, 60 sec: 38638.9, 300 sec: 38460.7). Total num frames: 2842714112. Throughput: 0: 9755.8. Samples: 460677282. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:38,978][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:39,162][626795] Updated weights for policy 0, policy_version 347012 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:41,286][626795] Updated weights for policy 0, policy_version 347022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:43,147][626795] Updated weights for policy 0, policy_version 347032 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:43,976][24592] Fps is (10 sec: 39321.3, 60 sec: 39230.9, 300 sec: 38433.0). Total num frames: 2842910720. Throughput: 0: 9724.3. Samples: 460705746. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:43,976][24592] Avg episode reward: [(0, '4.530')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:45,473][626795] Updated weights for policy 0, policy_version 347042 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:47,470][626795] Updated weights for policy 0, policy_version 347052 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:48,975][24592] Fps is (10 sec: 39321.9, 60 sec: 39185.1, 300 sec: 38516.3). Total num frames: 2843107328. Throughput: 0: 9757.2. Samples: 460765566. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:48,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:50,260][626795] Updated weights for policy 0, policy_version 347062 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:52,162][626795] Updated weights for policy 0, policy_version 347072 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:53,975][24592] Fps is (10 sec: 36864.3, 60 sec: 38640.3, 300 sec: 38405.2). Total num frames: 2843279360. Throughput: 0: 9668.3. Samples: 460819728. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:53,976][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:54,186][626795] Updated weights for policy 0, policy_version 347082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:56,123][626795] Updated weights for policy 0, policy_version 347092 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:33:58,086][626795] Updated weights for policy 0, policy_version 347102 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:58,976][24592] Fps is (10 sec: 38500.1, 60 sec: 38775.1, 300 sec: 38462.0). Total num frames: 2843492352. Throughput: 0: 9688.6. Samples: 460849818. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:33:58,978][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:00,295][626795] Updated weights for policy 0, policy_version 347112 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:02,353][626795] Updated weights for policy 0, policy_version 347122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:03,976][24592] Fps is (10 sec: 40138.7, 60 sec: 38775.2, 300 sec: 38405.2). Total num frames: 2843680768. Throughput: 0: 9789.4. Samples: 460909524. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:03,979][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:04,574][626795] Updated weights for policy 0, policy_version 347132 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:06,736][626795] Updated weights for policy 0, policy_version 347142 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:08,777][626795] Updated weights for policy 0, policy_version 347152 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:08,976][24592] Fps is (10 sec: 37683.9, 60 sec: 38638.7, 300 sec: 38349.6). Total num frames: 2843869184. Throughput: 0: 9699.5. Samples: 460966152. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:08,977][24592] Avg episode reward: [(0, '4.515')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:11,027][626795] Updated weights for policy 0, policy_version 347162 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:13,126][626795] Updated weights for policy 0, policy_version 347172 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:13,975][24592] Fps is (10 sec: 37684.8, 60 sec: 38502.4, 300 sec: 38294.1). Total num frames: 2844057600. Throughput: 0: 9688.0. Samples: 460995318. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:13,977][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:15,462][626795] Updated weights for policy 0, policy_version 347182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:17,526][626795] Updated weights for policy 0, policy_version 347192 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:18,976][24592] Fps is (10 sec: 38502.7, 60 sec: 38916.3, 300 sec: 38294.1). Total num frames: 2844254208. Throughput: 0: 9650.2. Samples: 461051628. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:18,977][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:19,626][626795] Updated weights for policy 0, policy_version 347202 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:21,659][626795] Updated weights for policy 0, policy_version 347212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:23,975][24592] Fps is (10 sec: 36864.3, 60 sec: 38365.9, 300 sec: 38266.4). Total num frames: 2844426240. Throughput: 0: 9480.4. Samples: 461103900. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:23,976][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:24,522][626795] Updated weights for policy 0, policy_version 347222 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:27,056][626795] Updated weights for policy 0, policy_version 347232 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:28,975][24592] Fps is (10 sec: 33588.0, 60 sec: 37819.7, 300 sec: 38127.5). Total num frames: 2844590080. Throughput: 0: 9378.4. Samples: 461127774. Policy #0 lag: (min: 0.0, avg: 2.2, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:28,982][24592] Avg episode reward: [(0, '5.060')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:29,396][626795] Updated weights for policy 0, policy_version 347242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:31,877][626795] Updated weights for policy 0, policy_version 347252 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:33,979][24592] Fps is (10 sec: 31121.4, 60 sec: 36998.9, 300 sec: 37932.8). Total num frames: 2844737536. Throughput: 0: 9148.4. Samples: 461177268. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:33,981][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:35,088][626795] Updated weights for policy 0, policy_version 347262 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:38,578][626795] Updated weights for policy 0, policy_version 347272 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:38,975][24592] Fps is (10 sec: 27033.7, 60 sec: 35771.7, 300 sec: 37766.5). Total num frames: 2844860416. Throughput: 0: 8763.7. Samples: 461214096. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:38,977][24592] Avg episode reward: [(0, '4.351')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:42,118][626795] Updated weights for policy 0, policy_version 347282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:43,977][24592] Fps is (10 sec: 24581.7, 60 sec: 34542.8, 300 sec: 37516.5). Total num frames: 2844983296. Throughput: 0: 8499.4. Samples: 461232288. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:43,980][24592] Avg episode reward: [(0, '4.464')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:44,706][626795] Updated weights for policy 0, policy_version 347292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:46,889][626795] Updated weights for policy 0, policy_version 347302 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:48,976][24592] Fps is (10 sec: 30309.7, 60 sec: 34269.7, 300 sec: 37433.5). Total num frames: 2845163520. Throughput: 0: 8266.0. Samples: 461281494. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:48,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:49,392][626795] Updated weights for policy 0, policy_version 347312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:51,476][626795] Updated weights for policy 0, policy_version 347322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:53,568][626795] Updated weights for policy 0, policy_version 347332 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:53,975][24592] Fps is (10 sec: 36865.2, 60 sec: 34542.9, 300 sec: 37494.3). Total num frames: 2845351936. Throughput: 0: 8251.0. Samples: 461337444. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:53,976][24592] Avg episode reward: [(0, '4.460')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:56,467][626795] Updated weights for policy 0, policy_version 347342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:34:58,568][626795] Updated weights for policy 0, policy_version 347352 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:58,975][24592] Fps is (10 sec: 35226.8, 60 sec: 33724.1, 300 sec: 37350.0). Total num frames: 2845515776. Throughput: 0: 8085.6. Samples: 461359170. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:34:58,977][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:00,766][626795] Updated weights for policy 0, policy_version 347362 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:02,827][626795] Updated weights for policy 0, policy_version 347372 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:03,978][24592] Fps is (10 sec: 35217.8, 60 sec: 33722.8, 300 sec: 37266.4). Total num frames: 2845704192. Throughput: 0: 8096.3. Samples: 461415978. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:03,978][24592] Avg episode reward: [(0, '4.431')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000347376_2845704192.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:04,190][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000346286_2836774912.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:05,420][626795] Updated weights for policy 0, policy_version 347382 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:07,609][626795] Updated weights for policy 0, policy_version 347392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:08,975][24592] Fps is (10 sec: 36044.7, 60 sec: 33450.9, 300 sec: 37266.7). Total num frames: 2845876224. Throughput: 0: 8108.9. Samples: 461468802. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:08,977][24592] Avg episode reward: [(0, '4.476')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:09,883][626795] Updated weights for policy 0, policy_version 347402 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:11,951][626795] Updated weights for policy 0, policy_version 347412 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:13,975][24592] Fps is (10 sec: 36872.0, 60 sec: 33587.2, 300 sec: 37294.5). Total num frames: 2846072832. Throughput: 0: 8209.1. Samples: 461497182. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:13,977][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:14,296][626795] Updated weights for policy 0, policy_version 347422 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:17,443][626795] Updated weights for policy 0, policy_version 347432 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:18,975][24592] Fps is (10 sec: 31129.2, 60 sec: 32222.0, 300 sec: 37016.7). Total num frames: 2846187520. Throughput: 0: 8111.4. Samples: 461542260. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:18,988][24592] Avg episode reward: [(0, '4.635')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:20,930][626795] Updated weights for policy 0, policy_version 347442 (0.0043)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:23,165][626795] Updated weights for policy 0, policy_version 347452 (0.0030)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:23,977][24592] Fps is (10 sec: 27029.0, 60 sec: 31947.9, 300 sec: 36849.9). Total num frames: 2846343168. Throughput: 0: 8266.2. Samples: 461586090. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:23,980][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:25,835][626795] Updated weights for policy 0, policy_version 347462 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:28,141][626795] Updated weights for policy 0, policy_version 347472 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:29,228][24592] Fps is (10 sec: 30362.6, 60 sec: 31678.9, 300 sec: 36846.3). Total num frames: 2846498816. Throughput: 0: 8349.7. Samples: 461610132. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:29,233][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:31,402][626795] Updated weights for policy 0, policy_version 347482 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:33,522][626795] Updated weights for policy 0, policy_version 347492 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:33,976][24592] Fps is (10 sec: 32773.1, 60 sec: 32223.2, 300 sec: 36794.6). Total num frames: 2846670848. Throughput: 0: 8325.4. Samples: 461656134. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:33,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:35,689][626795] Updated weights for policy 0, policy_version 347502 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:37,582][626795] Updated weights for policy 0, policy_version 347512 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:38,975][24592] Fps is (10 sec: 38659.9, 60 sec: 33587.2, 300 sec: 36822.7). Total num frames: 2846875648. Throughput: 0: 8428.9. Samples: 461716746. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:38,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:39,532][626795] Updated weights for policy 0, policy_version 347522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:41,543][626795] Updated weights for policy 0, policy_version 347532 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:43,450][626795] Updated weights for policy 0, policy_version 347542 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:43,976][24592] Fps is (10 sec: 40958.7, 60 sec: 34952.5, 300 sec: 36850.1). Total num frames: 2847080448. Throughput: 0: 8631.8. Samples: 461747604. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:43,977][24592] Avg episode reward: [(0, '4.374')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:45,525][626795] Updated weights for policy 0, policy_version 347552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:47,322][626795] Updated weights for policy 0, policy_version 347562 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:48,976][24592] Fps is (10 sec: 40140.4, 60 sec: 35225.7, 300 sec: 36933.4). Total num frames: 2847277056. Throughput: 0: 8752.8. Samples: 461809836. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:48,979][24592] Avg episode reward: [(0, '4.849')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:49,794][626795] Updated weights for policy 0, policy_version 347572 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:51,830][626795] Updated weights for policy 0, policy_version 347582 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:53,781][626795] Updated weights for policy 0, policy_version 347592 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:53,975][24592] Fps is (10 sec: 39323.5, 60 sec: 35362.1, 300 sec: 36961.2). Total num frames: 2847473664. Throughput: 0: 8868.5. Samples: 461867886. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:53,977][24592] Avg episode reward: [(0, '4.781')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:55,926][626795] Updated weights for policy 0, policy_version 347602 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:57,943][626795] Updated weights for policy 0, policy_version 347612 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:58,976][24592] Fps is (10 sec: 40138.3, 60 sec: 36044.3, 300 sec: 36961.1). Total num frames: 2847678464. Throughput: 0: 8880.0. Samples: 461896788. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:35:58,986][24592] Avg episode reward: [(0, '4.805')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:35:59,927][626795] Updated weights for policy 0, policy_version 347622 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:03,317][626795] Updated weights for policy 0, policy_version 347632 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:03,976][24592] Fps is (10 sec: 34406.0, 60 sec: 35226.8, 300 sec: 37017.5). Total num frames: 2847817728. Throughput: 0: 8964.9. Samples: 461945682. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:03,978][24592] Avg episode reward: [(0, '4.922')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:05,516][626795] Updated weights for policy 0, policy_version 347642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:07,538][626795] Updated weights for policy 0, policy_version 347652 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:08,975][24592] Fps is (10 sec: 34409.3, 60 sec: 35771.7, 300 sec: 37100.0). Total num frames: 2848022528. Throughput: 0: 9284.0. Samples: 462003852. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:08,977][24592] Avg episode reward: [(0, '4.825')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:09,512][626795] Updated weights for policy 0, policy_version 347662 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:11,516][626795] Updated weights for policy 0, policy_version 347672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:13,334][626795] Updated weights for policy 0, policy_version 347682 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:13,982][24592] Fps is (10 sec: 40933.6, 60 sec: 35904.4, 300 sec: 37099.3). Total num frames: 2848227328. Throughput: 0: 9491.0. Samples: 462034890. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:13,983][24592] Avg episode reward: [(0, '4.610')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:15,395][626795] Updated weights for policy 0, policy_version 347692 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:17,263][626795] Updated weights for policy 0, policy_version 347702 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:18,975][24592] Fps is (10 sec: 41779.3, 60 sec: 37546.8, 300 sec: 37155.6). Total num frames: 2848440320. Throughput: 0: 9822.7. Samples: 462098154. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:18,977][24592] Avg episode reward: [(0, '4.907')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:19,337][626795] Updated weights for policy 0, policy_version 347712 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:21,164][626795] Updated weights for policy 0, policy_version 347722 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:23,192][626795] Updated weights for policy 0, policy_version 347732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:23,975][24592] Fps is (10 sec: 42626.4, 60 sec: 38503.5, 300 sec: 37183.4). Total num frames: 2848653312. Throughput: 0: 9867.9. Samples: 462160800. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:23,977][24592] Avg episode reward: [(0, '4.705')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:25,112][626795] Updated weights for policy 0, policy_version 347742 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:27,044][626795] Updated weights for policy 0, policy_version 347752 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:28,880][626795] Updated weights for policy 0, policy_version 347762 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:28,975][24592] Fps is (10 sec: 42597.8, 60 sec: 39625.0, 300 sec: 37183.3). Total num frames: 2848866304. Throughput: 0: 9895.5. Samples: 462192900. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:28,978][24592] Avg episode reward: [(0, '4.485')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:30,995][626795] Updated weights for policy 0, policy_version 347772 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:32,847][626795] Updated weights for policy 0, policy_version 347782 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:33,977][24592] Fps is (10 sec: 41772.3, 60 sec: 40003.3, 300 sec: 37322.0). Total num frames: 2849071104. Throughput: 0: 9918.8. Samples: 462256194. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:33,978][24592] Avg episode reward: [(0, '4.654')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:35,675][626795] Updated weights for policy 0, policy_version 347792 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:37,650][626795] Updated weights for policy 0, policy_version 347802 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:38,975][24592] Fps is (10 sec: 38502.9, 60 sec: 39594.7, 300 sec: 37239.2). Total num frames: 2849251328. Throughput: 0: 9837.7. Samples: 462310584. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:38,976][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:39,573][626795] Updated weights for policy 0, policy_version 347812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:41,455][626795] Updated weights for policy 0, policy_version 347822 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:43,423][626795] Updated weights for policy 0, policy_version 347832 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:43,975][24592] Fps is (10 sec: 38508.9, 60 sec: 39595.0, 300 sec: 37238.9). Total num frames: 2849456128. Throughput: 0: 9900.3. Samples: 462342294. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:43,978][24592] Avg episode reward: [(0, '4.861')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:45,350][626795] Updated weights for policy 0, policy_version 347842 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:47,315][626795] Updated weights for policy 0, policy_version 347852 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:48,975][24592] Fps is (10 sec: 41779.0, 60 sec: 39867.8, 300 sec: 37294.4). Total num frames: 2849669120. Throughput: 0: 10202.9. Samples: 462404814. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:48,977][24592] Avg episode reward: [(0, '4.512')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:49,220][626795] Updated weights for policy 0, policy_version 347862 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:51,162][626795] Updated weights for policy 0, policy_version 347872 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:53,083][626795] Updated weights for policy 0, policy_version 347882 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:53,975][24592] Fps is (10 sec: 42597.9, 60 sec: 40140.7, 300 sec: 37350.0). Total num frames: 2849882112. Throughput: 0: 10325.2. Samples: 462468486. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:53,976][24592] Avg episode reward: [(0, '4.510')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:55,053][626795] Updated weights for policy 0, policy_version 347892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:57,044][626795] Updated weights for policy 0, policy_version 347902 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:36:58,791][626795] Updated weights for policy 0, policy_version 347912 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:58,975][24592] Fps is (10 sec: 42598.7, 60 sec: 40277.9, 300 sec: 37377.7). Total num frames: 2850095104. Throughput: 0: 10347.1. Samples: 462500442. Policy #0 lag: (min: 0.0, avg: 2.0, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:36:58,977][24592] Avg episode reward: [(0, '4.558')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:00,825][626795] Updated weights for policy 0, policy_version 347922 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:02,641][626795] Updated weights for policy 0, policy_version 347932 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:03,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41506.2, 300 sec: 37433.3). Total num frames: 2850308096. Throughput: 0: 10369.6. Samples: 462564786. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:03,978][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000347938_2850308096.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:04,165][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000346845_2841354240.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:04,775][626795] Updated weights for policy 0, policy_version 347942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:06,654][626795] Updated weights for policy 0, policy_version 347952 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:08,976][24592] Fps is (10 sec: 39319.9, 60 sec: 41096.3, 300 sec: 37516.5). Total num frames: 2850488320. Throughput: 0: 10174.6. Samples: 462618660. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:08,978][24592] Avg episode reward: [(0, '4.802')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:09,457][626795] Updated weights for policy 0, policy_version 347962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:11,395][626795] Updated weights for policy 0, policy_version 347972 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:13,286][626795] Updated weights for policy 0, policy_version 347982 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:13,975][24592] Fps is (10 sec: 38502.9, 60 sec: 41101.1, 300 sec: 37544.8). Total num frames: 2850693120. Throughput: 0: 10161.0. Samples: 462650142. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:13,976][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:15,221][626795] Updated weights for policy 0, policy_version 347992 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:17,095][626795] Updated weights for policy 0, policy_version 348002 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:18,976][24592] Fps is (10 sec: 41779.5, 60 sec: 41096.3, 300 sec: 37599.8). Total num frames: 2850906112. Throughput: 0: 10172.3. Samples: 462713934. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:18,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:19,000][626795] Updated weights for policy 0, policy_version 348012 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:21,044][626795] Updated weights for policy 0, policy_version 348022 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:22,827][626795] Updated weights for policy 0, policy_version 348032 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:23,975][24592] Fps is (10 sec: 42598.0, 60 sec: 41096.5, 300 sec: 37655.5). Total num frames: 2851119104. Throughput: 0: 10394.8. Samples: 462778350. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:23,978][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:24,824][626795] Updated weights for policy 0, policy_version 348042 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:26,722][626795] Updated weights for policy 0, policy_version 348052 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:28,604][626795] Updated weights for policy 0, policy_version 348062 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:28,975][24592] Fps is (10 sec: 42599.7, 60 sec: 41096.6, 300 sec: 37738.7). Total num frames: 2851332096. Throughput: 0: 10407.5. Samples: 462810630. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:28,976][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:30,629][626795] Updated weights for policy 0, policy_version 348072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:32,537][626795] Updated weights for policy 0, policy_version 348082 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:33,977][24592] Fps is (10 sec: 42590.9, 60 sec: 41233.0, 300 sec: 37794.1). Total num frames: 2851545088. Throughput: 0: 10418.0. Samples: 462873642. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:33,979][24592] Avg episode reward: [(0, '4.483')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:34,589][626795] Updated weights for policy 0, policy_version 348092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:36,436][626795] Updated weights for policy 0, policy_version 348102 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:38,410][626795] Updated weights for policy 0, policy_version 348112 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:38,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41642.6, 300 sec: 37941.2). Total num frames: 2851749888. Throughput: 0: 10415.3. Samples: 462937176. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:38,977][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:40,460][626795] Updated weights for policy 0, policy_version 348122 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:43,057][626795] Updated weights for policy 0, policy_version 348132 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:43,975][24592] Fps is (10 sec: 38509.2, 60 sec: 41233.0, 300 sec: 37877.6). Total num frames: 2851930112. Throughput: 0: 10202.6. Samples: 462959562. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:43,977][24592] Avg episode reward: [(0, '4.495')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:45,013][626795] Updated weights for policy 0, policy_version 348142 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:46,957][626795] Updated weights for policy 0, policy_version 348152 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:48,975][24592] Fps is (10 sec: 38502.5, 60 sec: 41096.6, 300 sec: 37877.9). Total num frames: 2852134912. Throughput: 0: 10185.2. Samples: 463023120. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:48,976][24592] Avg episode reward: [(0, '4.374')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:48,988][626795] Updated weights for policy 0, policy_version 348162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:50,904][626795] Updated weights for policy 0, policy_version 348172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:52,750][626795] Updated weights for policy 0, policy_version 348182 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:53,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41096.6, 300 sec: 37905.4). Total num frames: 2852347904. Throughput: 0: 10395.8. Samples: 463086468. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:53,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:54,782][626795] Updated weights for policy 0, policy_version 348192 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:56,714][626795] Updated weights for policy 0, policy_version 348202 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:37:58,556][626795] Updated weights for policy 0, policy_version 348212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:58,975][24592] Fps is (10 sec: 43417.1, 60 sec: 41233.0, 300 sec: 38016.4). Total num frames: 2852569088. Throughput: 0: 10412.0. Samples: 463118682. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:37:58,977][24592] Avg episode reward: [(0, '5.174')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:00,356][626795] Updated weights for policy 0, policy_version 348222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:02,293][626795] Updated weights for policy 0, policy_version 348232 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:03,975][24592] Fps is (10 sec: 44236.8, 60 sec: 41369.6, 300 sec: 38099.7). Total num frames: 2852790272. Throughput: 0: 10448.3. Samples: 463184106. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:03,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:04,240][626795] Updated weights for policy 0, policy_version 348242 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:06,052][626795] Updated weights for policy 0, policy_version 348252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:08,025][626795] Updated weights for policy 0, policy_version 348262 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:08,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41779.5, 300 sec: 38127.5). Total num frames: 2852995072. Throughput: 0: 10429.5. Samples: 463247676. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:08,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:09,990][626795] Updated weights for policy 0, policy_version 348272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:12,027][626795] Updated weights for policy 0, policy_version 348282 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:14,324][24592] Fps is (10 sec: 38788.3, 60 sec: 41402.0, 300 sec: 38166.6). Total num frames: 2853191680. Throughput: 0: 10315.7. Samples: 463278432. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:14,326][24592] Avg episode reward: [(0, '4.629')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:14,649][626795] Updated weights for policy 0, policy_version 348292 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:16,659][626795] Updated weights for policy 0, policy_version 348302 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:18,598][626795] Updated weights for policy 0, policy_version 348312 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:18,975][24592] Fps is (10 sec: 39321.5, 60 sec: 41369.8, 300 sec: 38183.0). Total num frames: 2853388288. Throughput: 0: 10232.1. Samples: 463334070. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:18,976][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:20,591][626795] Updated weights for policy 0, policy_version 348322 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:22,441][626795] Updated weights for policy 0, policy_version 348332 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:23,975][24592] Fps is (10 sec: 42439.7, 60 sec: 41369.6, 300 sec: 38238.6). Total num frames: 2853601280. Throughput: 0: 10231.1. Samples: 463397574. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:23,977][24592] Avg episode reward: [(0, '4.956')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:24,370][626795] Updated weights for policy 0, policy_version 348342 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:26,125][626795] Updated weights for policy 0, policy_version 348352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:28,024][626795] Updated weights for policy 0, policy_version 348362 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:28,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41369.6, 300 sec: 38294.1). Total num frames: 2853814272. Throughput: 0: 10460.1. Samples: 463430268. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:28,977][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:29,967][626795] Updated weights for policy 0, policy_version 348372 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:31,873][626795] Updated weights for policy 0, policy_version 348382 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:32,963][626772] Signal inference workers to stop experience collection... (5950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:32,965][626772] Signal inference workers to resume experience collection... (5950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:32,989][626795] InferenceWorker_p0-w0: stopping experience collection (5950 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:32,991][626795] InferenceWorker_p0-w0: resuming experience collection (5950 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:33,823][626795] Updated weights for policy 0, policy_version 348392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:33,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41370.8, 300 sec: 38349.7). Total num frames: 2854027264. Throughput: 0: 10483.7. Samples: 463494888. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:33,978][24592] Avg episode reward: [(0, '4.622')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:35,922][626795] Updated weights for policy 0, policy_version 348402 (0.0032)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:37,756][626795] Updated weights for policy 0, policy_version 348412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:38,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41506.1, 300 sec: 38405.2). Total num frames: 2854240256. Throughput: 0: 10483.5. Samples: 463558224. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:38,976][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:39,662][626795] Updated weights for policy 0, policy_version 348422 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:41,605][626795] Updated weights for policy 0, policy_version 348432 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:43,520][626795] Updated weights for policy 0, policy_version 348442 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:43,975][24592] Fps is (10 sec: 41779.7, 60 sec: 41915.8, 300 sec: 38433.0). Total num frames: 2854445056. Throughput: 0: 10468.7. Samples: 463589772. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:43,977][24592] Avg episode reward: [(0, '4.466')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:45,479][626795] Updated weights for policy 0, policy_version 348452 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:48,276][626795] Updated weights for policy 0, policy_version 348462 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:48,975][24592] Fps is (10 sec: 38502.6, 60 sec: 41506.1, 300 sec: 38460.7). Total num frames: 2854625280. Throughput: 0: 10225.6. Samples: 463644258. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:48,977][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:50,349][626795] Updated weights for policy 0, policy_version 348472 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:52,253][626795] Updated weights for policy 0, policy_version 348482 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:53,975][24592] Fps is (10 sec: 38502.4, 60 sec: 41369.6, 300 sec: 38433.1). Total num frames: 2854830080. Throughput: 0: 10200.5. Samples: 463706700. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:53,976][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:54,244][626795] Updated weights for policy 0, policy_version 348492 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:56,202][626795] Updated weights for policy 0, policy_version 348502 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:38:57,998][626795] Updated weights for policy 0, policy_version 348512 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:58,975][24592] Fps is (10 sec: 41779.0, 60 sec: 41233.1, 300 sec: 38516.3). Total num frames: 2855043072. Throughput: 0: 10284.9. Samples: 463737666. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:38:58,977][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:00,066][626795] Updated weights for policy 0, policy_version 348522 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:01,902][626795] Updated weights for policy 0, policy_version 348532 (0.0035)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:03,816][626795] Updated weights for policy 0, policy_version 348542 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:03,975][24592] Fps is (10 sec: 43417.8, 60 sec: 41233.1, 300 sec: 38627.4). Total num frames: 2855264256. Throughput: 0: 10388.0. Samples: 463801530. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:03,977][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000348543_2855264256.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:04,134][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000347376_2845704192.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:05,800][626795] Updated weights for policy 0, policy_version 348552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:07,640][626795] Updated weights for policy 0, policy_version 348562 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:08,976][24592] Fps is (10 sec: 43417.4, 60 sec: 41369.5, 300 sec: 38710.7). Total num frames: 2855477248. Throughput: 0: 10421.5. Samples: 463866540. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:08,977][24592] Avg episode reward: [(0, '4.634')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:09,544][626795] Updated weights for policy 0, policy_version 348572 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:11,486][626795] Updated weights for policy 0, policy_version 348582 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:13,345][626795] Updated weights for policy 0, policy_version 348592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:13,976][24592] Fps is (10 sec: 42597.4, 60 sec: 41886.0, 300 sec: 38766.2). Total num frames: 2855690240. Throughput: 0: 10411.9. Samples: 463898808. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:13,977][24592] Avg episode reward: [(0, '4.701')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:15,299][626795] Updated weights for policy 0, policy_version 348602 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:17,280][626795] Updated weights for policy 0, policy_version 348612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:18,975][24592] Fps is (10 sec: 41779.8, 60 sec: 41779.2, 300 sec: 38877.3). Total num frames: 2855895040. Throughput: 0: 10382.7. Samples: 463962108. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:18,977][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:19,220][626795] Updated weights for policy 0, policy_version 348622 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:21,976][626795] Updated weights for policy 0, policy_version 348632 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:23,784][626795] Updated weights for policy 0, policy_version 348642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:23,982][24592] Fps is (10 sec: 38478.7, 60 sec: 41228.7, 300 sec: 38932.0). Total num frames: 2856075264. Throughput: 0: 10205.9. Samples: 464017554. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:23,982][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:25,822][626795] Updated weights for policy 0, policy_version 348652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:27,726][626795] Updated weights for policy 0, policy_version 348662 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:28,976][24592] Fps is (10 sec: 39318.1, 60 sec: 41232.5, 300 sec: 39155.2). Total num frames: 2856288256. Throughput: 0: 10198.5. Samples: 464048712. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:28,977][24592] Avg episode reward: [(0, '4.818')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:29,767][626795] Updated weights for policy 0, policy_version 348672 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:31,667][626795] Updated weights for policy 0, policy_version 348682 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:33,590][626795] Updated weights for policy 0, policy_version 348692 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:33,976][24592] Fps is (10 sec: 42623.5, 60 sec: 41232.8, 300 sec: 39460.4). Total num frames: 2856501248. Throughput: 0: 10390.2. Samples: 464111820. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:33,977][24592] Avg episode reward: [(0, '4.469')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:35,550][626795] Updated weights for policy 0, policy_version 348702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:37,554][626795] Updated weights for policy 0, policy_version 348712 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:38,975][24592] Fps is (10 sec: 41782.8, 60 sec: 41096.6, 300 sec: 39738.2). Total num frames: 2856706048. Throughput: 0: 10420.3. Samples: 464175612. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:38,976][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:39,276][626795] Updated weights for policy 0, policy_version 348722 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:41,235][626795] Updated weights for policy 0, policy_version 348732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:43,117][626795] Updated weights for policy 0, policy_version 348742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:43,976][24592] Fps is (10 sec: 42598.9, 60 sec: 41369.4, 300 sec: 39877.0). Total num frames: 2856927232. Throughput: 0: 10459.5. Samples: 464208348. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:43,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:45,162][626795] Updated weights for policy 0, policy_version 348752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:46,945][626795] Updated weights for policy 0, policy_version 348762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:48,931][626795] Updated weights for policy 0, policy_version 348772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:48,976][24592] Fps is (10 sec: 43416.0, 60 sec: 41915.5, 300 sec: 39960.2). Total num frames: 2857140224. Throughput: 0: 10445.8. Samples: 464271594. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:48,978][24592] Avg episode reward: [(0, '4.707')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:50,917][626795] Updated weights for policy 0, policy_version 348782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:53,657][626795] Updated weights for policy 0, policy_version 348792 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:53,976][24592] Fps is (10 sec: 38502.5, 60 sec: 41369.4, 300 sec: 39988.0). Total num frames: 2857312256. Throughput: 0: 10236.7. Samples: 464327196. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:53,976][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:55,519][626795] Updated weights for policy 0, policy_version 348802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:57,592][626795] Updated weights for policy 0, policy_version 348812 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:58,975][24592] Fps is (10 sec: 37684.4, 60 sec: 41233.1, 300 sec: 40043.9). Total num frames: 2857517056. Throughput: 0: 10202.2. Samples: 464357904. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:39:58,986][24592] Avg episode reward: [(0, '4.704')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:39:59,551][626795] Updated weights for policy 0, policy_version 348822 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:01,507][626795] Updated weights for policy 0, policy_version 348832 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:03,328][626795] Updated weights for policy 0, policy_version 348842 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:03,975][24592] Fps is (10 sec: 41780.8, 60 sec: 41096.5, 300 sec: 40182.5). Total num frames: 2857730048. Throughput: 0: 10205.2. Samples: 464421342. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:03,976][24592] Avg episode reward: [(0, '4.751')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:05,383][626795] Updated weights for policy 0, policy_version 348852 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:07,221][626795] Updated weights for policy 0, policy_version 348862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:08,975][24592] Fps is (10 sec: 42598.6, 60 sec: 41096.6, 300 sec: 40238.0). Total num frames: 2857943040. Throughput: 0: 10369.2. Samples: 464484102. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:08,976][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:09,393][626795] Updated weights for policy 0, policy_version 348872 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:11,144][626795] Updated weights for policy 0, policy_version 348882 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:13,037][626795] Updated weights for policy 0, policy_version 348892 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:13,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41096.7, 300 sec: 40571.2). Total num frames: 2858156032. Throughput: 0: 10392.3. Samples: 464516358. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:13,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:14,870][626795] Updated weights for policy 0, policy_version 348902 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:16,858][626795] Updated weights for policy 0, policy_version 348912 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:18,843][626795] Updated weights for policy 0, policy_version 348922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:18,976][24592] Fps is (10 sec: 43416.0, 60 sec: 41369.3, 300 sec: 40793.6). Total num frames: 2858377216. Throughput: 0: 10422.4. Samples: 464580828. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:18,977][24592] Avg episode reward: [(0, '4.475')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:20,739][626795] Updated weights for policy 0, policy_version 348932 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:22,634][626795] Updated weights for policy 0, policy_version 348942 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:23,976][24592] Fps is (10 sec: 43416.1, 60 sec: 41919.9, 300 sec: 41022.9). Total num frames: 2858590208. Throughput: 0: 10419.7. Samples: 464644500. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:23,978][24592] Avg episode reward: [(0, '4.780')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:24,632][626795] Updated weights for policy 0, policy_version 348952 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:27,403][626795] Updated weights for policy 0, policy_version 348962 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:28,976][24592] Fps is (10 sec: 38501.7, 60 sec: 41233.3, 300 sec: 40987.7). Total num frames: 2858762240. Throughput: 0: 10200.5. Samples: 464667372. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:28,977][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:29,351][626795] Updated weights for policy 0, policy_version 348972 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:31,260][626795] Updated weights for policy 0, policy_version 348982 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:33,190][626795] Updated weights for policy 0, policy_version 348992 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:33,975][24592] Fps is (10 sec: 37684.5, 60 sec: 41096.9, 300 sec: 40987.8). Total num frames: 2858967040. Throughput: 0: 10197.3. Samples: 464730468. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:33,976][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:35,235][626795] Updated weights for policy 0, policy_version 349002 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:37,208][626795] Updated weights for policy 0, policy_version 349012 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:38,976][24592] Fps is (10 sec: 41778.3, 60 sec: 41232.5, 300 sec: 41015.5). Total num frames: 2859180032. Throughput: 0: 10357.5. Samples: 464793288. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:38,978][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:38,985][626795] Updated weights for policy 0, policy_version 349022 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:40,959][626795] Updated weights for policy 0, policy_version 349032 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:42,927][626795] Updated weights for policy 0, policy_version 349042 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:43,976][24592] Fps is (10 sec: 43415.1, 60 sec: 41232.9, 300 sec: 41098.8). Total num frames: 2859401216. Throughput: 0: 10377.3. Samples: 464824890. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:43,977][24592] Avg episode reward: [(0, '4.374')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:44,687][626795] Updated weights for policy 0, policy_version 349052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:46,603][626795] Updated weights for policy 0, policy_version 349062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:48,509][626795] Updated weights for policy 0, policy_version 349072 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:48,975][24592] Fps is (10 sec: 43421.0, 60 sec: 41233.3, 300 sec: 41154.4). Total num frames: 2859614208. Throughput: 0: 10431.7. Samples: 464890770. Policy #0 lag: (min: 0.0, avg: 2.5, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:48,976][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:50,532][626795] Updated weights for policy 0, policy_version 349082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:52,555][626795] Updated weights for policy 0, policy_version 349092 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:53,975][24592] Fps is (10 sec: 41781.4, 60 sec: 41779.4, 300 sec: 41154.5). Total num frames: 2859819008. Throughput: 0: 10436.9. Samples: 464953764. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:53,976][24592] Avg episode reward: [(0, '4.428')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:54,422][626795] Updated weights for policy 0, policy_version 349102 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:56,303][626795] Updated weights for policy 0, policy_version 349112 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:40:58,333][626795] Updated weights for policy 0, policy_version 349122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:59,666][24592] Fps is (10 sec: 39079.9, 60 sec: 41438.7, 300 sec: 41307.6). Total num frames: 2860032000. Throughput: 0: 10256.3. Samples: 464984976. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:40:59,668][24592] Avg episode reward: [(0, '4.764')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:01,016][626795] Updated weights for policy 0, policy_version 349132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:03,190][626795] Updated weights for policy 0, policy_version 349142 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:03,976][24592] Fps is (10 sec: 37681.7, 60 sec: 41096.2, 300 sec: 41265.4). Total num frames: 2860195840. Throughput: 0: 10171.5. Samples: 465038544. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:03,977][24592] Avg episode reward: [(0, '4.580')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:03,984][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000349145_2860195840.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:04,148][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000347938_2850308096.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:05,441][626795] Updated weights for policy 0, policy_version 349152 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:07,274][626795] Updated weights for policy 0, policy_version 349162 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:08,976][24592] Fps is (10 sec: 39598.0, 60 sec: 40959.8, 300 sec: 41266.3). Total num frames: 2860400640. Throughput: 0: 10098.8. Samples: 465098946. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:08,978][24592] Avg episode reward: [(0, '4.879')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:09,256][626795] Updated weights for policy 0, policy_version 349172 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:11,085][626795] Updated weights for policy 0, policy_version 349182 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:13,046][626795] Updated weights for policy 0, policy_version 349192 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:13,975][24592] Fps is (10 sec: 41781.0, 60 sec: 40960.0, 300 sec: 41265.5). Total num frames: 2860613632. Throughput: 0: 10306.9. Samples: 465131178. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:13,977][24592] Avg episode reward: [(0, '4.954')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:14,891][626795] Updated weights for policy 0, policy_version 349202 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:16,857][626795] Updated weights for policy 0, policy_version 349212 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:18,709][626795] Updated weights for policy 0, policy_version 349222 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:18,976][24592] Fps is (10 sec: 43416.6, 60 sec: 40959.9, 300 sec: 41293.2). Total num frames: 2860834816. Throughput: 0: 10340.4. Samples: 465195792. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:18,977][24592] Avg episode reward: [(0, '4.482')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:20,670][626795] Updated weights for policy 0, policy_version 349232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:22,591][626795] Updated weights for policy 0, policy_version 349242 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:23,975][24592] Fps is (10 sec: 43417.5, 60 sec: 40960.2, 300 sec: 41293.2). Total num frames: 2861047808. Throughput: 0: 10384.7. Samples: 465260592. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:23,977][24592] Avg episode reward: [(0, '5.013')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:24,451][626795] Updated weights for policy 0, policy_version 349252 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:26,372][626795] Updated weights for policy 0, policy_version 349262 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:28,294][626795] Updated weights for policy 0, policy_version 349272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:28,975][24592] Fps is (10 sec: 41781.2, 60 sec: 41506.5, 300 sec: 41293.5). Total num frames: 2861252608. Throughput: 0: 10388.4. Samples: 465292362. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:28,976][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:30,301][626795] Updated weights for policy 0, policy_version 349282 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:33,069][626795] Updated weights for policy 0, policy_version 349292 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:33,975][24592] Fps is (10 sec: 38502.2, 60 sec: 41096.5, 300 sec: 41293.2). Total num frames: 2861432832. Throughput: 0: 10161.6. Samples: 465348042. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:33,977][24592] Avg episode reward: [(0, '4.710')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:35,113][626795] Updated weights for policy 0, policy_version 349302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:37,029][626795] Updated weights for policy 0, policy_version 349312 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:38,926][626795] Updated weights for policy 0, policy_version 349322 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:38,976][24592] Fps is (10 sec: 39319.1, 60 sec: 41096.6, 300 sec: 41320.9). Total num frames: 2861645824. Throughput: 0: 10111.6. Samples: 465408792. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:38,978][24592] Avg episode reward: [(0, '4.938')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:40,935][626795] Updated weights for policy 0, policy_version 349332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:42,789][626795] Updated weights for policy 0, policy_version 349342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:43,975][24592] Fps is (10 sec: 42598.5, 60 sec: 40960.4, 300 sec: 41321.0). Total num frames: 2861858816. Throughput: 0: 10289.4. Samples: 465440892. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:43,977][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:44,621][626795] Updated weights for policy 0, policy_version 349352 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:46,567][626795] Updated weights for policy 0, policy_version 349362 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:48,428][626795] Updated weights for policy 0, policy_version 349372 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:48,975][24592] Fps is (10 sec: 43420.5, 60 sec: 41096.5, 300 sec: 41348.8). Total num frames: 2862080000. Throughput: 0: 10394.4. Samples: 465506286. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:48,977][24592] Avg episode reward: [(0, '4.800')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:50,308][626795] Updated weights for policy 0, policy_version 349382 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:52,179][626795] Updated weights for policy 0, policy_version 349392 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:53,975][24592] Fps is (10 sec: 43417.4, 60 sec: 41233.0, 300 sec: 41348.8). Total num frames: 2862292992. Throughput: 0: 10496.6. Samples: 465571290. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:53,977][24592] Avg episode reward: [(0, '4.588')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:54,101][626795] Updated weights for policy 0, policy_version 349402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:56,057][626795] Updated weights for policy 0, policy_version 349412 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:57,929][626795] Updated weights for policy 0, policy_version 349422 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:58,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41713.3, 300 sec: 41348.8). Total num frames: 2862505984. Throughput: 0: 10478.7. Samples: 465602718. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:41:58,976][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:41:59,997][626795] Updated weights for policy 0, policy_version 349432 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:02,040][626795] Updated weights for policy 0, policy_version 349442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:03,896][626795] Updated weights for policy 0, policy_version 349452 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:03,976][24592] Fps is (10 sec: 41778.8, 60 sec: 41915.9, 300 sec: 41432.1). Total num frames: 2862710784. Throughput: 0: 10422.6. Samples: 465664806. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:03,977][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:06,765][626795] Updated weights for policy 0, policy_version 349462 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:08,670][626795] Updated weights for policy 0, policy_version 349472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:08,975][24592] Fps is (10 sec: 37683.0, 60 sec: 41369.7, 300 sec: 41321.0). Total num frames: 2862882816. Throughput: 0: 10188.3. Samples: 465719064. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:08,976][24592] Avg episode reward: [(0, '4.647')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:10,778][626795] Updated weights for policy 0, policy_version 349482 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:12,665][626795] Updated weights for policy 0, policy_version 349492 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:13,975][24592] Fps is (10 sec: 38503.1, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2863095808. Throughput: 0: 10168.9. Samples: 465749964. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:13,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:14,581][626795] Updated weights for policy 0, policy_version 349502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:16,433][626795] Updated weights for policy 0, policy_version 349512 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:18,298][626795] Updated weights for policy 0, policy_version 349522 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:18,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41233.4, 300 sec: 41321.0). Total num frames: 2863308800. Throughput: 0: 10364.0. Samples: 465814422. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:18,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:20,308][626795] Updated weights for policy 0, policy_version 349532 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:22,242][626795] Updated weights for policy 0, policy_version 349542 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:23,970][626795] Updated weights for policy 0, policy_version 349552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:23,975][24592] Fps is (10 sec: 43417.6, 60 sec: 41369.6, 300 sec: 41348.8). Total num frames: 2863529984. Throughput: 0: 10463.5. Samples: 465879642. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:23,976][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:25,916][626795] Updated weights for policy 0, policy_version 349562 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:27,620][626795] Updated weights for policy 0, policy_version 349572 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:28,975][24592] Fps is (10 sec: 43417.5, 60 sec: 41506.1, 300 sec: 41349.0). Total num frames: 2863742976. Throughput: 0: 10475.6. Samples: 465912294. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:28,977][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:29,642][626795] Updated weights for policy 0, policy_version 349582 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:31,568][626795] Updated weights for policy 0, policy_version 349592 (0.0034)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:33,417][626795] Updated weights for policy 0, policy_version 349602 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:33,975][24592] Fps is (10 sec: 41779.2, 60 sec: 41915.8, 300 sec: 41348.8). Total num frames: 2863947776. Throughput: 0: 10447.6. Samples: 465976428. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:33,976][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:35,444][626795] Updated weights for policy 0, policy_version 349612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:37,428][626795] Updated weights for policy 0, policy_version 349622 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:38,976][24592] Fps is (10 sec: 38500.7, 60 sec: 41369.7, 300 sec: 41348.7). Total num frames: 2864128000. Throughput: 0: 9697.6. Samples: 466007688. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:38,978][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:40,272][626795] Updated weights for policy 0, policy_version 349632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:42,219][626795] Updated weights for policy 0, policy_version 349642 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:43,975][24592] Fps is (10 sec: 39321.8, 60 sec: 41369.7, 300 sec: 41376.5). Total num frames: 2864340992. Throughput: 0: 10195.3. Samples: 466061508. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:43,978][24592] Avg episode reward: [(0, '4.874')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:44,211][626795] Updated weights for policy 0, policy_version 349652 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:46,108][626795] Updated weights for policy 0, policy_version 349662 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:48,089][626795] Updated weights for policy 0, policy_version 349672 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:48,975][24592] Fps is (10 sec: 41781.5, 60 sec: 41096.5, 300 sec: 41348.8). Total num frames: 2864545792. Throughput: 0: 10204.2. Samples: 466123992. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:48,977][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:50,079][626795] Updated weights for policy 0, policy_version 349682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:51,914][626795] Updated weights for policy 0, policy_version 349692 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:53,839][626795] Updated weights for policy 0, policy_version 349702 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:53,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41096.6, 300 sec: 41321.0). Total num frames: 2864758784. Throughput: 0: 10425.1. Samples: 466188192. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:53,977][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:55,749][626795] Updated weights for policy 0, policy_version 349712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:57,642][626795] Updated weights for policy 0, policy_version 349722 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:58,975][24592] Fps is (10 sec: 43417.3, 60 sec: 41233.1, 300 sec: 41321.0). Total num frames: 2864979968. Throughput: 0: 10465.7. Samples: 466220922. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:42:58,988][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:42:59,517][626795] Updated weights for policy 0, policy_version 349732 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:01,380][626795] Updated weights for policy 0, policy_version 349742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:03,329][626795] Updated weights for policy 0, policy_version 349752 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:03,975][24592] Fps is (10 sec: 43417.9, 60 sec: 41369.7, 300 sec: 41348.8). Total num frames: 2865192960. Throughput: 0: 10477.2. Samples: 466285896. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:03,978][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:03,985][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000349755_2865192960.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:04,154][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000348543_2855264256.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:05,361][626795] Updated weights for policy 0, policy_version 349762 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:07,245][626795] Updated weights for policy 0, policy_version 349772 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:08,976][24592] Fps is (10 sec: 41777.6, 60 sec: 41915.5, 300 sec: 41425.5). Total num frames: 2865397760. Throughput: 0: 10414.8. Samples: 466348314. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:08,978][24592] Avg episode reward: [(0, '4.506')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:09,248][626795] Updated weights for policy 0, policy_version 349782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:11,209][626795] Updated weights for policy 0, policy_version 349792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:13,975][24592] Fps is (10 sec: 37682.6, 60 sec: 41233.0, 300 sec: 41293.2). Total num frames: 2865569792. Throughput: 0: 10302.4. Samples: 466375902. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:13,982][24592] Avg episode reward: [(0, '4.816')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:14,050][626795] Updated weights for policy 0, policy_version 349802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:15,976][626795] Updated weights for policy 0, policy_version 349812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:17,911][626795] Updated weights for policy 0, policy_version 349822 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:18,976][24592] Fps is (10 sec: 38501.6, 60 sec: 41232.7, 300 sec: 41293.2). Total num frames: 2865782784. Throughput: 0: 10152.1. Samples: 466433280. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:18,977][24592] Avg episode reward: [(0, '4.411')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:19,901][626795] Updated weights for policy 0, policy_version 349832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:21,820][626795] Updated weights for policy 0, policy_version 349842 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:23,742][626795] Updated weights for policy 0, policy_version 349852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:23,977][24592] Fps is (10 sec: 42593.4, 60 sec: 41095.6, 300 sec: 41293.0). Total num frames: 2865995776. Throughput: 0: 10873.3. Samples: 466496994. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:23,978][24592] Avg episode reward: [(0, '5.158')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:25,633][626795] Updated weights for policy 0, policy_version 349862 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:27,480][626795] Updated weights for policy 0, policy_version 349872 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:28,976][24592] Fps is (10 sec: 43418.4, 60 sec: 41232.8, 300 sec: 41321.0). Total num frames: 2866216960. Throughput: 0: 10392.4. Samples: 466529172. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:28,977][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:29,580][626795] Updated weights for policy 0, policy_version 349882 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:31,240][626795] Updated weights for policy 0, policy_version 349892 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:33,208][626795] Updated weights for policy 0, policy_version 349902 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:33,982][24592] Fps is (10 sec: 42574.7, 60 sec: 41228.3, 300 sec: 41292.3). Total num frames: 2866421760. Throughput: 0: 10443.9. Samples: 466594038. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:33,983][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:35,181][626795] Updated weights for policy 0, policy_version 349912 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:37,191][626795] Updated weights for policy 0, policy_version 349922 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:38,976][24592] Fps is (10 sec: 41780.1, 60 sec: 41779.4, 300 sec: 41321.0). Total num frames: 2866634752. Throughput: 0: 10416.4. Samples: 466656930. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:38,977][24592] Avg episode reward: [(0, '4.840')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:39,088][626795] Updated weights for policy 0, policy_version 349932 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:41,108][626795] Updated weights for policy 0, policy_version 349942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:43,041][626795] Updated weights for policy 0, policy_version 349952 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:43,975][24592] Fps is (10 sec: 41807.9, 60 sec: 41642.6, 300 sec: 41404.3). Total num frames: 2866839552. Throughput: 0: 10381.1. Samples: 466688070. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:43,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:45,841][626795] Updated weights for policy 0, policy_version 349962 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:47,790][626795] Updated weights for policy 0, policy_version 349972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:48,975][24592] Fps is (10 sec: 37683.9, 60 sec: 41096.5, 300 sec: 41293.2). Total num frames: 2867011584. Throughput: 0: 10128.8. Samples: 466741692. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:48,977][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:49,882][626795] Updated weights for policy 0, policy_version 349982 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:51,755][626795] Updated weights for policy 0, policy_version 349992 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:53,726][626795] Updated weights for policy 0, policy_version 350002 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:53,976][24592] Fps is (10 sec: 39320.0, 60 sec: 41232.8, 300 sec: 41321.0). Total num frames: 2867232768. Throughput: 0: 10149.2. Samples: 466805028. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:53,976][24592] Avg episode reward: [(0, '4.999')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:55,542][626795] Updated weights for policy 0, policy_version 350012 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:57,478][626795] Updated weights for policy 0, policy_version 350022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:58,976][24592] Fps is (10 sec: 43415.2, 60 sec: 41096.2, 300 sec: 41293.1). Total num frames: 2867445760. Throughput: 0: 10241.5. Samples: 466836774. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:43:58,977][24592] Avg episode reward: [(0, '4.762')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:43:59,359][626795] Updated weights for policy 0, policy_version 350032 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:01,198][626795] Updated weights for policy 0, policy_version 350042 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:03,153][626795] Updated weights for policy 0, policy_version 350052 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:03,975][24592] Fps is (10 sec: 42600.4, 60 sec: 41096.5, 300 sec: 41293.3). Total num frames: 2867658752. Throughput: 0: 10414.2. Samples: 466901910. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:03,976][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:05,171][626795] Updated weights for policy 0, policy_version 350062 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:06,973][626795] Updated weights for policy 0, policy_version 350072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:08,903][626795] Updated weights for policy 0, policy_version 350082 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:08,976][24592] Fps is (10 sec: 42600.2, 60 sec: 41233.2, 300 sec: 41293.2). Total num frames: 2867871744. Throughput: 0: 10409.2. Samples: 466965396. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:08,976][24592] Avg episode reward: [(0, '4.933')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:11,065][626795] Updated weights for policy 0, policy_version 350092 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:12,780][626795] Updated weights for policy 0, policy_version 350102 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:13,975][24592] Fps is (10 sec: 41778.6, 60 sec: 41779.2, 300 sec: 41293.2). Total num frames: 2868076544. Throughput: 0: 10387.8. Samples: 466996620. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:13,977][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:14,858][626795] Updated weights for policy 0, policy_version 350112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:16,655][626795] Updated weights for policy 0, policy_version 350122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:18,975][24592] Fps is (10 sec: 38502.7, 60 sec: 41233.4, 300 sec: 41294.1). Total num frames: 2868256768. Throughput: 0: 10308.8. Samples: 467057862. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:18,977][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:19,629][626795] Updated weights for policy 0, policy_version 350132 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:21,595][626795] Updated weights for policy 0, policy_version 350142 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:23,570][626795] Updated weights for policy 0, policy_version 350152 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:23,975][24592] Fps is (10 sec: 38502.6, 60 sec: 41097.4, 300 sec: 41265.6). Total num frames: 2868461568. Throughput: 0: 10140.2. Samples: 467113236. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:23,977][24592] Avg episode reward: [(0, '4.959')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:25,442][626795] Updated weights for policy 0, policy_version 350162 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:27,398][626795] Updated weights for policy 0, policy_version 350172 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:28,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40960.2, 300 sec: 41265.5). Total num frames: 2868674560. Throughput: 0: 10155.2. Samples: 467145054. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:28,978][24592] Avg episode reward: [(0, '4.752')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:29,355][626795] Updated weights for policy 0, policy_version 350182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:31,147][626795] Updated weights for policy 0, policy_version 350192 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:33,213][626795] Updated weights for policy 0, policy_version 350202 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:33,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41101.2, 300 sec: 41293.2). Total num frames: 2868887552. Throughput: 0: 10406.8. Samples: 467209998. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:33,976][24592] Avg episode reward: [(0, '4.598')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:35,015][626795] Updated weights for policy 0, policy_version 350212 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:37,021][626795] Updated weights for policy 0, policy_version 350222 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:38,975][24592] Fps is (10 sec: 41779.6, 60 sec: 40960.2, 300 sec: 41237.8). Total num frames: 2869092352. Throughput: 0: 10405.7. Samples: 467273280. Policy #0 lag: (min: 0.0, avg: 1.9, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:38,977][24592] Avg episode reward: [(0, '4.410')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:38,980][626795] Updated weights for policy 0, policy_version 350232 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:40,837][626795] Updated weights for policy 0, policy_version 350242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:42,720][626795] Updated weights for policy 0, policy_version 350252 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:43,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41096.5, 300 sec: 41237.7). Total num frames: 2869305344. Throughput: 0: 10390.1. Samples: 467304324. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:43,976][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:44,800][626795] Updated weights for policy 0, policy_version 350262 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:46,700][626795] Updated weights for policy 0, policy_version 350272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:48,518][626795] Updated weights for policy 0, policy_version 350282 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:48,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41779.2, 300 sec: 41376.6). Total num frames: 2869518336. Throughput: 0: 10359.6. Samples: 467368092. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:48,976][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:51,395][626795] Updated weights for policy 0, policy_version 350292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:53,299][626795] Updated weights for policy 0, policy_version 350302 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:53,977][24592] Fps is (10 sec: 39317.1, 60 sec: 41096.0, 300 sec: 41293.1). Total num frames: 2869698560. Throughput: 0: 10172.0. Samples: 467423148. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:53,978][24592] Avg episode reward: [(0, '4.712')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:55,234][626795] Updated weights for policy 0, policy_version 350312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:57,087][626795] Updated weights for policy 0, policy_version 350322 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:44:58,973][626795] Updated weights for policy 0, policy_version 350332 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:58,975][24592] Fps is (10 sec: 40140.8, 60 sec: 41233.5, 300 sec: 41321.0). Total num frames: 2869919744. Throughput: 0: 10190.7. Samples: 467455200. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:44:58,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:01,019][626795] Updated weights for policy 0, policy_version 350342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:02,755][626795] Updated weights for policy 0, policy_version 350352 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:03,975][24592] Fps is (10 sec: 43422.6, 60 sec: 41233.0, 300 sec: 41321.0). Total num frames: 2870132736. Throughput: 0: 10280.7. Samples: 467520492. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:03,977][24592] Avg episode reward: [(0, '4.803')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000350358_2870132736.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:04,122][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000349145_2860195840.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:04,788][626795] Updated weights for policy 0, policy_version 350362 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:06,666][626795] Updated weights for policy 0, policy_version 350372 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:08,602][626795] Updated weights for policy 0, policy_version 350382 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:08,991][24592] Fps is (10 sec: 41713.0, 60 sec: 41085.8, 300 sec: 41291.0). Total num frames: 2870337536. Throughput: 0: 10449.7. Samples: 467583636. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:08,996][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:10,612][626795] Updated weights for policy 0, policy_version 350392 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:12,373][626795] Updated weights for policy 0, policy_version 350402 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41233.1, 300 sec: 41265.5). Total num frames: 2870550528. Throughput: 0: 10449.6. Samples: 467615286. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:13,978][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:14,399][626795] Updated weights for policy 0, policy_version 350412 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:16,239][626795] Updated weights for policy 0, policy_version 350422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:18,209][626795] Updated weights for policy 0, policy_version 350432 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:18,981][24592] Fps is (10 sec: 42641.2, 60 sec: 41775.2, 300 sec: 41264.7). Total num frames: 2870763520. Throughput: 0: 10432.4. Samples: 467679516. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:18,982][24592] Avg episode reward: [(0, '4.416')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:20,172][626795] Updated weights for policy 0, policy_version 350442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:22,132][626795] Updated weights for policy 0, policy_version 350452 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:24,437][24592] Fps is (10 sec: 39151.6, 60 sec: 41324.5, 300 sec: 41284.2). Total num frames: 2870960128. Throughput: 0: 9630.7. Samples: 467711112. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:24,438][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:24,990][626795] Updated weights for policy 0, policy_version 350462 (0.0053)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:25,916][626772] Signal inference workers to stop experience collection... (6000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:25,917][626772] Signal inference workers to resume experience collection... (6000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:25,927][626795] InferenceWorker_p0-w0: stopping experience collection (6000 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:25,933][626795] InferenceWorker_p0-w0: resuming experience collection (6000 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:26,846][626795] Updated weights for policy 0, policy_version 350472 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:28,858][626795] Updated weights for policy 0, policy_version 350482 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:28,975][24592] Fps is (10 sec: 39344.3, 60 sec: 41369.6, 300 sec: 41321.0). Total num frames: 2871156736. Throughput: 0: 10241.3. Samples: 467765184. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:28,977][24592] Avg episode reward: [(0, '4.724')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:30,634][626795] Updated weights for policy 0, policy_version 350492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:32,577][626795] Updated weights for policy 0, policy_version 350502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:33,975][24592] Fps is (10 sec: 42943.3, 60 sec: 41369.5, 300 sec: 41321.1). Total num frames: 2871369728. Throughput: 0: 10249.7. Samples: 467829330. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:33,976][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:34,456][626795] Updated weights for policy 0, policy_version 350512 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:36,442][626795] Updated weights for policy 0, policy_version 350522 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:38,311][626795] Updated weights for policy 0, policy_version 350532 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:38,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41505.9, 300 sec: 41293.3). Total num frames: 2871582720. Throughput: 0: 10465.1. Samples: 467894070. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:38,977][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:40,265][626795] Updated weights for policy 0, policy_version 350542 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:42,188][626795] Updated weights for policy 0, policy_version 350552 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:43,975][24592] Fps is (10 sec: 42598.7, 60 sec: 41506.1, 300 sec: 41293.2). Total num frames: 2871795712. Throughput: 0: 10456.0. Samples: 467925720. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:43,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:44,159][626795] Updated weights for policy 0, policy_version 350562 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:45,947][626795] Updated weights for policy 0, policy_version 350572 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:47,908][626795] Updated weights for policy 0, policy_version 350582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:48,975][24592] Fps is (10 sec: 42599.5, 60 sec: 41506.1, 300 sec: 41321.0). Total num frames: 2872008704. Throughput: 0: 10437.2. Samples: 467990166. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:48,978][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:49,940][626795] Updated weights for policy 0, policy_version 350592 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:51,776][626795] Updated weights for policy 0, policy_version 350602 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:53,785][626795] Updated weights for policy 0, policy_version 350612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:53,976][24592] Fps is (10 sec: 42596.2, 60 sec: 42052.7, 300 sec: 41417.9). Total num frames: 2872221696. Throughput: 0: 10437.7. Samples: 468053172. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:53,978][24592] Avg episode reward: [(0, '4.835')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:55,754][626795] Updated weights for policy 0, policy_version 350622 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:45:58,491][626795] Updated weights for policy 0, policy_version 350632 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:58,975][24592] Fps is (10 sec: 38502.6, 60 sec: 41233.1, 300 sec: 41348.8). Total num frames: 2872393728. Throughput: 0: 10436.0. Samples: 468084906. Policy #0 lag: (min: 0.0, avg: 2.6, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:45:58,976][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:00,558][626795] Updated weights for policy 0, policy_version 350642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:02,437][626795] Updated weights for policy 0, policy_version 350652 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:03,976][24592] Fps is (10 sec: 37684.3, 60 sec: 41096.4, 300 sec: 41348.8). Total num frames: 2872598528. Throughput: 0: 10199.0. Samples: 468138414. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:03,978][24592] Avg episode reward: [(0, '4.921')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:04,285][626795] Updated weights for policy 0, policy_version 350662 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:06,341][626795] Updated weights for policy 0, policy_version 350672 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:08,179][626795] Updated weights for policy 0, policy_version 350682 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:08,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41380.4, 300 sec: 41376.5). Total num frames: 2872819712. Throughput: 0: 11011.0. Samples: 468201522. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:08,977][24592] Avg episode reward: [(0, '4.869')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:10,188][626795] Updated weights for policy 0, policy_version 350692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:12,141][626795] Updated weights for policy 0, policy_version 350702 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:13,975][24592] Fps is (10 sec: 42599.4, 60 sec: 41233.1, 300 sec: 41321.1). Total num frames: 2873024512. Throughput: 0: 10399.2. Samples: 468233148. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:13,978][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:14,029][626795] Updated weights for policy 0, policy_version 350712 (0.0034)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:15,927][626795] Updated weights for policy 0, policy_version 350722 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:17,788][626795] Updated weights for policy 0, policy_version 350732 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:18,975][24592] Fps is (10 sec: 41780.0, 60 sec: 41237.1, 300 sec: 41321.0). Total num frames: 2873237504. Throughput: 0: 10406.6. Samples: 468297624. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:18,976][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:19,800][626795] Updated weights for policy 0, policy_version 350742 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:21,797][626795] Updated weights for policy 0, policy_version 350752 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:23,668][626795] Updated weights for policy 0, policy_version 350762 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:23,975][24592] Fps is (10 sec: 42598.1, 60 sec: 41828.1, 300 sec: 41348.8). Total num frames: 2873450496. Throughput: 0: 10385.5. Samples: 468361416. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:23,976][24592] Avg episode reward: [(0, '4.754')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:25,621][626795] Updated weights for policy 0, policy_version 350772 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:27,496][626795] Updated weights for policy 0, policy_version 350782 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:28,975][24592] Fps is (10 sec: 42597.9, 60 sec: 41779.2, 300 sec: 41459.8). Total num frames: 2873663488. Throughput: 0: 10386.1. Samples: 468393096. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:28,977][24592] Avg episode reward: [(0, '4.834')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:29,421][626795] Updated weights for policy 0, policy_version 350792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:32,079][626795] Updated weights for policy 0, policy_version 350802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:33,971][626795] Updated weights for policy 0, policy_version 350812 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:33,976][24592] Fps is (10 sec: 40138.1, 60 sec: 41369.1, 300 sec: 41376.5). Total num frames: 2873851904. Throughput: 0: 10208.2. Samples: 468449544. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:33,982][24592] Avg episode reward: [(0, '4.923')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:35,936][626795] Updated weights for policy 0, policy_version 350822 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:37,807][626795] Updated weights for policy 0, policy_version 350832 (0.0034)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:38,976][24592] Fps is (10 sec: 39320.4, 60 sec: 41233.0, 300 sec: 41348.7). Total num frames: 2874056704. Throughput: 0: 10224.3. Samples: 468513264. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:38,977][24592] Avg episode reward: [(0, '4.617')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:39,823][626795] Updated weights for policy 0, policy_version 350842 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:41,648][626795] Updated weights for policy 0, policy_version 350852 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:43,582][626795] Updated weights for policy 0, policy_version 350862 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:43,975][24592] Fps is (10 sec: 41782.2, 60 sec: 41233.0, 300 sec: 41321.0). Total num frames: 2874269696. Throughput: 0: 10216.1. Samples: 468544632. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:43,977][24592] Avg episode reward: [(0, '4.838')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:45,557][626795] Updated weights for policy 0, policy_version 350872 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:47,477][626795] Updated weights for policy 0, policy_version 350882 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:48,975][24592] Fps is (10 sec: 42600.5, 60 sec: 41233.1, 300 sec: 41321.0). Total num frames: 2874482688. Throughput: 0: 10457.7. Samples: 468609006. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:48,976][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:49,363][626795] Updated weights for policy 0, policy_version 350892 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:51,338][626795] Updated weights for policy 0, policy_version 350902 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:53,291][626795] Updated weights for policy 0, policy_version 350912 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:53,975][24592] Fps is (10 sec: 41779.4, 60 sec: 41096.9, 300 sec: 41293.2). Total num frames: 2874687488. Throughput: 0: 10438.0. Samples: 468671232. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:53,978][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:55,350][626795] Updated weights for policy 0, policy_version 350922 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:57,236][626795] Updated weights for policy 0, policy_version 350932 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:58,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41915.7, 300 sec: 41348.8). Total num frames: 2874908672. Throughput: 0: 10449.9. Samples: 468703392. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:46:58,976][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:46:59,128][626795] Updated weights for policy 0, policy_version 350942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:01,109][626795] Updated weights for policy 0, policy_version 350952 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:03,962][626795] Updated weights for policy 0, policy_version 350962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:03,975][24592] Fps is (10 sec: 39321.3, 60 sec: 41369.7, 300 sec: 41348.8). Total num frames: 2875080704. Throughput: 0: 10426.7. Samples: 468766824. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:03,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000350962_2875080704.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:04,094][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000349755_2865192960.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:05,841][626795] Updated weights for policy 0, policy_version 350972 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:08,393][626795] Updated weights for policy 0, policy_version 350982 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:08,975][24592] Fps is (10 sec: 35225.6, 60 sec: 40687.1, 300 sec: 41237.7). Total num frames: 2875260928. Throughput: 0: 10050.2. Samples: 468813672. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:08,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:10,620][626795] Updated weights for policy 0, policy_version 350992 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:12,458][626795] Updated weights for policy 0, policy_version 351002 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:13,975][24592] Fps is (10 sec: 37683.6, 60 sec: 40550.4, 300 sec: 41182.2). Total num frames: 2875457536. Throughput: 0: 10021.8. Samples: 468844074. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:13,978][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:14,896][626795] Updated weights for policy 0, policy_version 351012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:16,844][626795] Updated weights for policy 0, policy_version 351022 (0.0026)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:18,975][24592] Fps is (10 sec: 38501.8, 60 sec: 40140.7, 300 sec: 41071.1). Total num frames: 2875645952. Throughput: 0: 10056.9. Samples: 468902100. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:18,976][24592] Avg episode reward: [(0, '4.554')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:19,024][626795] Updated weights for policy 0, policy_version 351032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:21,069][626795] Updated weights for policy 0, policy_version 351042 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:22,999][626795] Updated weights for policy 0, policy_version 351052 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:23,975][24592] Fps is (10 sec: 39321.6, 60 sec: 40004.4, 300 sec: 41043.3). Total num frames: 2875850752. Throughput: 0: 9950.5. Samples: 468961032. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:23,976][24592] Avg episode reward: [(0, '4.946')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:25,242][626795] Updated weights for policy 0, policy_version 351062 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:27,384][626795] Updated weights for policy 0, policy_version 351072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:28,976][24592] Fps is (10 sec: 39320.5, 60 sec: 39594.5, 300 sec: 40987.7). Total num frames: 2876039168. Throughput: 0: 9889.0. Samples: 468989640. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:28,977][24592] Avg episode reward: [(0, '4.677')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:29,444][626795] Updated weights for policy 0, policy_version 351082 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:31,498][626795] Updated weights for policy 0, policy_version 351092 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:33,423][626795] Updated weights for policy 0, policy_version 351102 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:33,975][24592] Fps is (10 sec: 39321.4, 60 sec: 39868.2, 300 sec: 41071.1). Total num frames: 2876243968. Throughput: 0: 9783.4. Samples: 469049262. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:33,977][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:35,508][626795] Updated weights for policy 0, policy_version 351112 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:38,489][626795] Updated weights for policy 0, policy_version 351122 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:38,976][24592] Fps is (10 sec: 36863.6, 60 sec: 39185.0, 300 sec: 40904.4). Total num frames: 2876407808. Throughput: 0: 9537.4. Samples: 469100418. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:38,980][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:40,531][626795] Updated weights for policy 0, policy_version 351132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:42,582][626795] Updated weights for policy 0, policy_version 351142 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:43,975][24592] Fps is (10 sec: 36044.9, 60 sec: 38912.0, 300 sec: 40876.7). Total num frames: 2876604416. Throughput: 0: 9486.3. Samples: 469130274. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:43,976][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:44,670][626795] Updated weights for policy 0, policy_version 351152 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:46,803][626795] Updated weights for policy 0, policy_version 351162 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:48,780][626795] Updated weights for policy 0, policy_version 351172 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:48,975][24592] Fps is (10 sec: 40142.9, 60 sec: 38775.4, 300 sec: 40848.9). Total num frames: 2876809216. Throughput: 0: 9376.1. Samples: 469188750. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:48,977][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:50,768][626795] Updated weights for policy 0, policy_version 351182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:52,842][626795] Updated weights for policy 0, policy_version 351192 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:53,977][24592] Fps is (10 sec: 40951.8, 60 sec: 38774.2, 300 sec: 40793.1). Total num frames: 2877014016. Throughput: 0: 9697.4. Samples: 469250076. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:53,978][24592] Avg episode reward: [(0, '4.591')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:54,832][626795] Updated weights for policy 0, policy_version 351202 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:56,874][626795] Updated weights for policy 0, policy_version 351212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:58,975][24592] Fps is (10 sec: 39321.4, 60 sec: 38229.3, 300 sec: 40710.1). Total num frames: 2877202432. Throughput: 0: 9698.1. Samples: 469280490. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:47:58,978][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:47:59,021][626795] Updated weights for policy 0, policy_version 351222 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:01,129][626795] Updated weights for policy 0, policy_version 351232 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:03,125][626795] Updated weights for policy 0, policy_version 351242 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:03,981][24592] Fps is (10 sec: 39306.3, 60 sec: 38771.7, 300 sec: 40709.3). Total num frames: 2877407232. Throughput: 0: 9722.8. Samples: 469339680. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:03,982][24592] Avg episode reward: [(0, '4.563')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:05,267][626795] Updated weights for policy 0, policy_version 351252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:07,457][626795] Updated weights for policy 0, policy_version 351262 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:09,723][24592] Fps is (10 sec: 36587.7, 60 sec: 38433.3, 300 sec: 40662.6). Total num frames: 2877595648. Throughput: 0: 9554.8. Samples: 469398138. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:09,724][24592] Avg episode reward: [(0, '4.452')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:10,235][626795] Updated weights for policy 0, policy_version 351272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:12,250][626795] Updated weights for policy 0, policy_version 351282 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:13,975][24592] Fps is (10 sec: 36065.9, 60 sec: 38502.4, 300 sec: 40626.8). Total num frames: 2877767680. Throughput: 0: 9567.3. Samples: 469420164. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:13,978][24592] Avg episode reward: [(0, '4.945')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:14,258][626795] Updated weights for policy 0, policy_version 351292 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:16,217][626795] Updated weights for policy 0, policy_version 351302 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:18,338][626795] Updated weights for policy 0, policy_version 351312 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:18,976][24592] Fps is (10 sec: 40726.3, 60 sec: 38775.5, 300 sec: 40599.2). Total num frames: 2877972480. Throughput: 0: 9612.8. Samples: 469481838. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:18,979][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:20,306][626795] Updated weights for policy 0, policy_version 351322 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:22,193][626795] Updated weights for policy 0, policy_version 351332 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:23,975][24592] Fps is (10 sec: 41779.0, 60 sec: 38911.9, 300 sec: 40571.3). Total num frames: 2878185472. Throughput: 0: 9858.6. Samples: 469544052. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:23,976][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:24,223][626795] Updated weights for policy 0, policy_version 351342 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:26,099][626795] Updated weights for policy 0, policy_version 351352 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:27,985][626795] Updated weights for policy 0, policy_version 351362 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:28,976][24592] Fps is (10 sec: 41777.9, 60 sec: 39185.1, 300 sec: 40572.1). Total num frames: 2878390272. Throughput: 0: 9887.4. Samples: 469575210. Policy #0 lag: (min: 1.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:28,978][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:30,183][626795] Updated weights for policy 0, policy_version 351372 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:32,163][626795] Updated weights for policy 0, policy_version 351382 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:33,975][24592] Fps is (10 sec: 40960.2, 60 sec: 39185.1, 300 sec: 40543.5). Total num frames: 2878595072. Throughput: 0: 9941.1. Samples: 469636098. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:33,976][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:34,121][626795] Updated weights for policy 0, policy_version 351392 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:36,179][626795] Updated weights for policy 0, policy_version 351402 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:38,281][626795] Updated weights for policy 0, policy_version 351412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:38,975][24592] Fps is (10 sec: 40142.3, 60 sec: 39731.5, 300 sec: 40515.7). Total num frames: 2878791680. Throughput: 0: 9929.8. Samples: 469696896. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:38,976][24592] Avg episode reward: [(0, '4.897')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:40,159][626795] Updated weights for policy 0, policy_version 351422 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:42,919][626795] Updated weights for policy 0, policy_version 351432 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:43,975][24592] Fps is (10 sec: 37683.2, 60 sec: 39458.1, 300 sec: 40543.5). Total num frames: 2878971904. Throughput: 0: 9946.5. Samples: 469728084. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:43,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:44,891][626795] Updated weights for policy 0, policy_version 351442 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:46,895][626795] Updated weights for policy 0, policy_version 351452 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:48,671][626795] Updated weights for policy 0, policy_version 351462 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:48,976][24592] Fps is (10 sec: 38501.8, 60 sec: 39458.0, 300 sec: 40488.0). Total num frames: 2879176704. Throughput: 0: 9860.1. Samples: 469783326. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:48,977][24592] Avg episode reward: [(0, '4.566')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:50,706][626795] Updated weights for policy 0, policy_version 351472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:52,545][626795] Updated weights for policy 0, policy_version 351482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:53,976][24592] Fps is (10 sec: 42596.8, 60 sec: 39732.3, 300 sec: 40515.7). Total num frames: 2879397888. Throughput: 0: 10157.5. Samples: 469847640. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:53,977][24592] Avg episode reward: [(0, '4.282')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:54,580][626795] Updated weights for policy 0, policy_version 351492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:56,803][626795] Updated weights for policy 0, policy_version 351502 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:48:58,728][626795] Updated weights for policy 0, policy_version 351512 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:58,975][24592] Fps is (10 sec: 41780.0, 60 sec: 39867.8, 300 sec: 40460.1). Total num frames: 2879594496. Throughput: 0: 10111.5. Samples: 469875180. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:48:58,976][24592] Avg episode reward: [(0, '4.748')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:00,773][626795] Updated weights for policy 0, policy_version 351522 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:02,654][626795] Updated weights for policy 0, policy_version 351532 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:03,975][24592] Fps is (10 sec: 40142.5, 60 sec: 39871.6, 300 sec: 40432.4). Total num frames: 2879799296. Throughput: 0: 10139.1. Samples: 469938096. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:03,979][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:03,988][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000351538_2879799296.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:04,182][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000350358_2870132736.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:04,770][626795] Updated weights for policy 0, policy_version 351542 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:06,756][626795] Updated weights for policy 0, policy_version 351552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:08,589][626795] Updated weights for policy 0, policy_version 351562 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:08,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40647.1, 300 sec: 40432.4). Total num frames: 2880004096. Throughput: 0: 10132.0. Samples: 469999992. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:08,977][24592] Avg episode reward: [(0, '4.546')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:10,496][626795] Updated weights for policy 0, policy_version 351572 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:12,534][626795] Updated weights for policy 0, policy_version 351582 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:13,976][24592] Fps is (10 sec: 41777.4, 60 sec: 40823.2, 300 sec: 40543.4). Total num frames: 2880217088. Throughput: 0: 10133.2. Samples: 470031204. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:13,978][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:14,469][626795] Updated weights for policy 0, policy_version 351592 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:17,139][626795] Updated weights for policy 0, policy_version 351602 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:18,975][24592] Fps is (10 sec: 39321.4, 60 sec: 40413.9, 300 sec: 40460.2). Total num frames: 2880397312. Throughput: 0: 10010.7. Samples: 470086578. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:18,976][24592] Avg episode reward: [(0, '4.718')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:19,171][626795] Updated weights for policy 0, policy_version 351612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:21,009][626795] Updated weights for policy 0, policy_version 351622 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:22,885][626795] Updated weights for policy 0, policy_version 351632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:23,976][24592] Fps is (10 sec: 39321.3, 60 sec: 40413.6, 300 sec: 40460.1). Total num frames: 2880610304. Throughput: 0: 10072.0. Samples: 470150142. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:23,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:24,967][626795] Updated weights for policy 0, policy_version 351642 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:26,808][626795] Updated weights for policy 0, policy_version 351652 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:28,750][626795] Updated weights for policy 0, policy_version 351662 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:28,977][24592] Fps is (10 sec: 42591.8, 60 sec: 40549.6, 300 sec: 40459.9). Total num frames: 2880823296. Throughput: 0: 10088.2. Samples: 470182068. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:28,978][24592] Avg episode reward: [(0, '4.402')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:30,751][626795] Updated weights for policy 0, policy_version 351672 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:32,566][626795] Updated weights for policy 0, policy_version 351682 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:33,975][24592] Fps is (10 sec: 42600.4, 60 sec: 40686.9, 300 sec: 40487.9). Total num frames: 2881036288. Throughput: 0: 10284.6. Samples: 470246130. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:33,976][24592] Avg episode reward: [(0, '4.741')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:34,582][626795] Updated weights for policy 0, policy_version 351692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:36,517][626795] Updated weights for policy 0, policy_version 351702 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:38,410][626795] Updated weights for policy 0, policy_version 351712 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:38,975][24592] Fps is (10 sec: 41785.7, 60 sec: 40823.5, 300 sec: 40460.2). Total num frames: 2881241088. Throughput: 0: 10257.8. Samples: 470309238. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:38,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:40,329][626795] Updated weights for policy 0, policy_version 351722 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:42,319][626795] Updated weights for policy 0, policy_version 351732 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:43,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41506.1, 300 sec: 40487.9). Total num frames: 2881462272. Throughput: 0: 10356.0. Samples: 470341200. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:43,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:44,277][626795] Updated weights for policy 0, policy_version 351742 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:46,209][626795] Updated weights for policy 0, policy_version 351752 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:48,975][24592] Fps is (10 sec: 38501.9, 60 sec: 40823.5, 300 sec: 40432.5). Total num frames: 2881626112. Throughput: 0: 10336.2. Samples: 470403228. Policy #0 lag: (min: 0.0, avg: 2.5, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:48,977][24592] Avg episode reward: [(0, '4.821')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:49,055][626795] Updated weights for policy 0, policy_version 351762 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:50,989][626795] Updated weights for policy 0, policy_version 351772 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:52,858][626795] Updated weights for policy 0, policy_version 351782 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:53,976][24592] Fps is (10 sec: 37681.4, 60 sec: 40686.9, 300 sec: 40404.5). Total num frames: 2881839104. Throughput: 0: 10172.5. Samples: 470457762. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:53,977][24592] Avg episode reward: [(0, '4.397')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:54,866][626795] Updated weights for policy 0, policy_version 351792 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:56,850][626795] Updated weights for policy 0, policy_version 351802 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:49:58,695][626795] Updated weights for policy 0, policy_version 351812 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:58,975][24592] Fps is (10 sec: 41779.4, 60 sec: 40823.4, 300 sec: 40376.8). Total num frames: 2882043904. Throughput: 0: 10176.2. Samples: 470489130. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:49:58,979][24592] Avg episode reward: [(0, '4.659')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:00,786][626795] Updated weights for policy 0, policy_version 351822 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:02,700][626795] Updated weights for policy 0, policy_version 351832 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:03,975][24592] Fps is (10 sec: 41780.7, 60 sec: 40959.9, 300 sec: 40406.8). Total num frames: 2882256896. Throughput: 0: 10346.1. Samples: 470552154. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:03,977][24592] Avg episode reward: [(0, '4.785')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:04,736][626795] Updated weights for policy 0, policy_version 351842 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:06,578][626795] Updated weights for policy 0, policy_version 351852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:08,529][626795] Updated weights for policy 0, policy_version 351862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:08,975][24592] Fps is (10 sec: 42598.2, 60 sec: 41096.4, 300 sec: 40404.6). Total num frames: 2882469888. Throughput: 0: 10344.0. Samples: 470615616. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:08,977][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:10,459][626795] Updated weights for policy 0, policy_version 351872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:12,407][626795] Updated weights for policy 0, policy_version 351882 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:13,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41096.8, 300 sec: 40405.4). Total num frames: 2882682880. Throughput: 0: 10337.3. Samples: 470647230. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:13,977][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:14,401][626795] Updated weights for policy 0, policy_version 351892 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:16,261][626795] Updated weights for policy 0, policy_version 351902 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:18,421][626795] Updated weights for policy 0, policy_version 351912 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:18,975][24592] Fps is (10 sec: 40960.2, 60 sec: 41369.6, 300 sec: 40468.0). Total num frames: 2882879488. Throughput: 0: 10272.5. Samples: 470708394. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:18,977][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:20,546][626795] Updated weights for policy 0, policy_version 351922 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:23,393][626795] Updated weights for policy 0, policy_version 351932 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:23,975][24592] Fps is (10 sec: 36044.6, 60 sec: 40550.7, 300 sec: 40293.5). Total num frames: 2883043328. Throughput: 0: 10024.4. Samples: 470760336. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:23,978][24592] Avg episode reward: [(0, '4.448')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:25,477][626795] Updated weights for policy 0, policy_version 351942 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:27,442][626795] Updated weights for policy 0, policy_version 351952 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:28,975][24592] Fps is (10 sec: 36864.1, 60 sec: 40414.9, 300 sec: 40265.8). Total num frames: 2883248128. Throughput: 0: 9979.5. Samples: 470790276. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:28,977][24592] Avg episode reward: [(0, '5.045')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:29,588][626795] Updated weights for policy 0, policy_version 351962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:31,758][626795] Updated weights for policy 0, policy_version 351972 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:33,607][626795] Updated weights for policy 0, policy_version 351982 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:33,975][24592] Fps is (10 sec: 40141.1, 60 sec: 40140.8, 300 sec: 40210.3). Total num frames: 2883444736. Throughput: 0: 9903.5. Samples: 470848884. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:33,976][24592] Avg episode reward: [(0, '4.631')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:35,735][626795] Updated weights for policy 0, policy_version 351992 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:37,718][626795] Updated weights for policy 0, policy_version 352002 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:38,975][24592] Fps is (10 sec: 39321.4, 60 sec: 40004.2, 300 sec: 40154.7). Total num frames: 2883641344. Throughput: 0: 10034.4. Samples: 470909304. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:38,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:39,997][626795] Updated weights for policy 0, policy_version 352012 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:42,280][626795] Updated weights for policy 0, policy_version 352022 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:43,975][24592] Fps is (10 sec: 37683.2, 60 sec: 39321.6, 300 sec: 40043.6). Total num frames: 2883821568. Throughput: 0: 9939.5. Samples: 470936406. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:43,977][24592] Avg episode reward: [(0, '4.597')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:44,535][626795] Updated weights for policy 0, policy_version 352032 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:46,616][626795] Updated weights for policy 0, policy_version 352042 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:48,898][626795] Updated weights for policy 0, policy_version 352052 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:48,977][24592] Fps is (10 sec: 36861.8, 60 sec: 39730.8, 300 sec: 39960.3). Total num frames: 2884009984. Throughput: 0: 9751.1. Samples: 470990958. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:48,979][24592] Avg episode reward: [(0, '4.727')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:51,044][626795] Updated weights for policy 0, policy_version 352062 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:52,771][626795] Updated weights for policy 0, policy_version 352072 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:53,975][24592] Fps is (10 sec: 38502.3, 60 sec: 39458.4, 300 sec: 40043.6). Total num frames: 2884206592. Throughput: 0: 9666.9. Samples: 471050628. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:53,977][24592] Avg episode reward: [(0, '4.522')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:55,754][626795] Updated weights for policy 0, policy_version 352082 (0.0827)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:57,821][626795] Updated weights for policy 0, policy_version 352092 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:58,975][24592] Fps is (10 sec: 37686.0, 60 sec: 39048.6, 300 sec: 39960.3). Total num frames: 2884386816. Throughput: 0: 9480.3. Samples: 471073842. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:50:58,977][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:50:59,744][626795] Updated weights for policy 0, policy_version 352102 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:01,591][626795] Updated weights for policy 0, policy_version 352112 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:03,521][626795] Updated weights for policy 0, policy_version 352122 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:03,975][24592] Fps is (10 sec: 39321.6, 60 sec: 39048.6, 300 sec: 39932.5). Total num frames: 2884599808. Throughput: 0: 9512.4. Samples: 471136452. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:03,977][24592] Avg episode reward: [(0, '4.720')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000352124_2884599808.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:04,106][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000350962_2875080704.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:05,689][626795] Updated weights for policy 0, policy_version 352132 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:08,032][626795] Updated weights for policy 0, policy_version 352142 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:08,976][24592] Fps is (10 sec: 40138.9, 60 sec: 38638.7, 300 sec: 39876.9). Total num frames: 2884788224. Throughput: 0: 9628.5. Samples: 471193620. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:08,977][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:10,250][626795] Updated weights for policy 0, policy_version 352152 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:11,982][626795] Updated weights for policy 0, policy_version 352162 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:13,975][24592] Fps is (10 sec: 38502.1, 60 sec: 38365.8, 300 sec: 39821.4). Total num frames: 2884984832. Throughput: 0: 9637.6. Samples: 471223968. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:13,976][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:14,068][626795] Updated weights for policy 0, policy_version 352172 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:16,096][626795] Updated weights for policy 0, policy_version 352182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:18,167][626795] Updated weights for policy 0, policy_version 352192 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:18,976][24592] Fps is (10 sec: 40141.6, 60 sec: 38502.3, 300 sec: 39793.7). Total num frames: 2885189632. Throughput: 0: 9693.4. Samples: 471285090. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:18,978][24592] Avg episode reward: [(0, '4.648')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:20,141][626795] Updated weights for policy 0, policy_version 352202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:22,151][626795] Updated weights for policy 0, policy_version 352212 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:23,975][24592] Fps is (10 sec: 40960.4, 60 sec: 39185.1, 300 sec: 39765.9). Total num frames: 2885394432. Throughput: 0: 9721.6. Samples: 471346776. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:23,977][24592] Avg episode reward: [(0, '4.579')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:24,049][626795] Updated weights for policy 0, policy_version 352222 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:26,113][626795] Updated weights for policy 0, policy_version 352232 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:28,969][626795] Updated weights for policy 0, policy_version 352242 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:28,975][24592] Fps is (10 sec: 37684.1, 60 sec: 38639.0, 300 sec: 39710.5). Total num frames: 2885566464. Throughput: 0: 9794.7. Samples: 471377166. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:28,977][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:30,937][626795] Updated weights for policy 0, policy_version 352252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:32,708][626795] Updated weights for policy 0, policy_version 352262 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:33,975][24592] Fps is (10 sec: 37683.2, 60 sec: 38775.5, 300 sec: 39710.4). Total num frames: 2885771264. Throughput: 0: 9798.1. Samples: 471431868. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:33,977][24592] Avg episode reward: [(0, '5.056')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:34,781][626795] Updated weights for policy 0, policy_version 352272 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:36,697][626795] Updated weights for policy 0, policy_version 352282 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:38,509][626795] Updated weights for policy 0, policy_version 352292 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:38,975][24592] Fps is (10 sec: 42598.1, 60 sec: 39185.1, 300 sec: 39738.1). Total num frames: 2885992448. Throughput: 0: 9884.1. Samples: 471495414. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:38,976][24592] Avg episode reward: [(0, '4.876')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:40,533][626795] Updated weights for policy 0, policy_version 352302 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:42,451][626795] Updated weights for policy 0, policy_version 352312 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:43,976][24592] Fps is (10 sec: 43414.7, 60 sec: 39730.8, 300 sec: 39738.0). Total num frames: 2886205440. Throughput: 0: 10072.1. Samples: 471527094. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:43,977][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:44,439][626795] Updated weights for policy 0, policy_version 352322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:46,348][626795] Updated weights for policy 0, policy_version 352332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:48,204][626795] Updated weights for policy 0, policy_version 352342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:48,975][24592] Fps is (10 sec: 42598.4, 60 sec: 40141.2, 300 sec: 39765.9). Total num frames: 2886418432. Throughput: 0: 10103.2. Samples: 471591096. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:48,977][24592] Avg episode reward: [(0, '4.958')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:50,216][626795] Updated weights for policy 0, policy_version 352352 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:52,028][626795] Updated weights for policy 0, policy_version 352362 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:53,975][24592] Fps is (10 sec: 42601.3, 60 sec: 40413.9, 300 sec: 39738.1). Total num frames: 2886631424. Throughput: 0: 10261.8. Samples: 471655398. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:53,977][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:53,985][626795] Updated weights for policy 0, policy_version 352372 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:56,123][626795] Updated weights for policy 0, policy_version 352382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:58,074][626795] Updated weights for policy 0, policy_version 352392 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:58,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40686.9, 300 sec: 39821.5). Total num frames: 2886828032. Throughput: 0: 10238.3. Samples: 471684690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:51:58,977][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:51:59,946][626795] Updated weights for policy 0, policy_version 352402 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:02,752][626795] Updated weights for policy 0, policy_version 352412 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:03,975][24592] Fps is (10 sec: 37682.9, 60 sec: 40140.8, 300 sec: 39821.4). Total num frames: 2887008256. Throughput: 0: 10092.6. Samples: 471739254. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:03,977][24592] Avg episode reward: [(0, '4.585')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:04,736][626795] Updated weights for policy 0, policy_version 352422 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:06,643][626795] Updated weights for policy 0, policy_version 352432 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:08,549][626795] Updated weights for policy 0, policy_version 352442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:08,975][24592] Fps is (10 sec: 38502.7, 60 sec: 40414.2, 300 sec: 39849.2). Total num frames: 2887213056. Throughput: 0: 10149.2. Samples: 471803490. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:08,976][24592] Avg episode reward: [(0, '4.918')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:10,601][626795] Updated weights for policy 0, policy_version 352452 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:12,398][626795] Updated weights for policy 0, policy_version 352462 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:13,975][24592] Fps is (10 sec: 42598.7, 60 sec: 40823.5, 300 sec: 39960.3). Total num frames: 2887434240. Throughput: 0: 10165.3. Samples: 471834606. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:13,977][24592] Avg episode reward: [(0, '4.573')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:14,416][626795] Updated weights for policy 0, policy_version 352472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:16,248][626795] Updated weights for policy 0, policy_version 352482 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:18,142][626795] Updated weights for policy 0, policy_version 352492 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:18,975][24592] Fps is (10 sec: 43417.3, 60 sec: 40960.1, 300 sec: 39988.1). Total num frames: 2887647232. Throughput: 0: 10371.5. Samples: 471898584. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:18,977][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:20,103][626795] Updated weights for policy 0, policy_version 352502 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:22,027][626795] Updated weights for policy 0, policy_version 352512 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:23,921][626795] Updated weights for policy 0, policy_version 352522 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:23,976][24592] Fps is (10 sec: 42597.5, 60 sec: 41096.4, 300 sec: 40071.4). Total num frames: 2887860224. Throughput: 0: 10381.4. Samples: 471962580. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:23,977][24592] Avg episode reward: [(0, '4.602')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:25,991][626795] Updated weights for policy 0, policy_version 352532 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:28,788][626795] Updated weights for policy 0, policy_version 352542 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:28,975][24592] Fps is (10 sec: 37683.5, 60 sec: 40960.0, 300 sec: 39932.5). Total num frames: 2888024064. Throughput: 0: 10338.2. Samples: 471992304. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:28,976][24592] Avg episode reward: [(0, '4.365')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:31,017][626795] Updated weights for policy 0, policy_version 352552 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:33,068][626795] Updated weights for policy 0, policy_version 352562 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:34,230][24592] Fps is (10 sec: 32752.5, 60 sec: 40242.8, 300 sec: 39925.9). Total num frames: 2888196096. Throughput: 0: 9988.3. Samples: 472043118. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:34,231][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:36,171][626795] Updated weights for policy 0, policy_version 352572 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:38,425][626795] Updated weights for policy 0, policy_version 352582 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:38,986][24592] Fps is (10 sec: 34368.9, 60 sec: 39587.5, 300 sec: 39875.5). Total num frames: 2888368128. Throughput: 0: 9683.8. Samples: 472091274. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:38,987][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:40,445][626795] Updated weights for policy 0, policy_version 352592 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:43,063][626795] Updated weights for policy 0, policy_version 352602 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:43,975][24592] Fps is (10 sec: 36147.5, 60 sec: 39048.9, 300 sec: 39793.7). Total num frames: 2888548352. Throughput: 0: 9629.1. Samples: 472117998. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:43,976][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:45,322][626795] Updated weights for policy 0, policy_version 352612 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:47,644][626795] Updated weights for policy 0, policy_version 352622 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:48,976][24592] Fps is (10 sec: 36083.5, 60 sec: 38502.3, 300 sec: 39710.6). Total num frames: 2888728576. Throughput: 0: 9582.4. Samples: 472170462. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:48,980][24592] Avg episode reward: [(0, '4.847')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:49,667][626795] Updated weights for policy 0, policy_version 352632 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:51,658][626795] Updated weights for policy 0, policy_version 352642 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:53,589][626795] Updated weights for policy 0, policy_version 352652 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:53,975][24592] Fps is (10 sec: 38502.1, 60 sec: 38365.8, 300 sec: 39765.9). Total num frames: 2888933376. Throughput: 0: 9525.4. Samples: 472232136. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:53,977][24592] Avg episode reward: [(0, '4.742')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:55,763][626795] Updated weights for policy 0, policy_version 352662 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:56,597][626772] Signal inference workers to stop experience collection... (6050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:56,598][626772] Signal inference workers to resume experience collection... (6050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:56,616][626795] InferenceWorker_p0-w0: stopping experience collection (6050 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:56,623][626795] InferenceWorker_p0-w0: resuming experience collection (6050 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:57,768][626795] Updated weights for policy 0, policy_version 352672 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:58,976][24592] Fps is (10 sec: 40959.5, 60 sec: 38502.3, 300 sec: 39766.7). Total num frames: 2889138176. Throughput: 0: 9492.5. Samples: 472261770. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:52:58,976][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:52:59,721][626795] Updated weights for policy 0, policy_version 352682 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:01,636][626795] Updated weights for policy 0, policy_version 352692 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:03,588][626795] Updated weights for policy 0, policy_version 352702 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:03,975][24592] Fps is (10 sec: 41779.6, 60 sec: 39048.6, 300 sec: 39950.4). Total num frames: 2889351168. Throughput: 0: 9463.6. Samples: 472324446. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:03,976][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:03,978][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000352704_2889351168.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:04,110][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000351538_2879799296.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:05,633][626795] Updated weights for policy 0, policy_version 352712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:08,916][626795] Updated weights for policy 0, policy_version 352722 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:08,976][24592] Fps is (10 sec: 36042.0, 60 sec: 38092.1, 300 sec: 39765.8). Total num frames: 2889498624. Throughput: 0: 9117.6. Samples: 472372878. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:08,978][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:10,948][626795] Updated weights for policy 0, policy_version 352732 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:12,929][626795] Updated weights for policy 0, policy_version 352742 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:13,976][24592] Fps is (10 sec: 35223.8, 60 sec: 37819.4, 300 sec: 39765.9). Total num frames: 2889703424. Throughput: 0: 9131.0. Samples: 472403202. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:13,977][24592] Avg episode reward: [(0, '4.736')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:14,973][626795] Updated weights for policy 0, policy_version 352752 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:16,915][626795] Updated weights for policy 0, policy_version 352762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:18,817][626795] Updated weights for policy 0, policy_version 352772 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:18,975][24592] Fps is (10 sec: 41783.6, 60 sec: 37819.8, 300 sec: 39765.9). Total num frames: 2889916416. Throughput: 0: 9434.0. Samples: 472465242. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:18,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:20,806][626795] Updated weights for policy 0, policy_version 352782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:22,674][626795] Updated weights for policy 0, policy_version 352792 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:23,975][24592] Fps is (10 sec: 41781.3, 60 sec: 37683.3, 300 sec: 39766.0). Total num frames: 2890121216. Throughput: 0: 9727.3. Samples: 472528896. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:23,976][24592] Avg episode reward: [(0, '4.860')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:24,867][626795] Updated weights for policy 0, policy_version 352802 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:27,601][626795] Updated weights for policy 0, policy_version 352812 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:28,976][24592] Fps is (10 sec: 36863.1, 60 sec: 37683.0, 300 sec: 39627.0). Total num frames: 2890285056. Throughput: 0: 9682.4. Samples: 472553706. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:28,977][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:29,713][626795] Updated weights for policy 0, policy_version 352822 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:31,699][626795] Updated weights for policy 0, policy_version 352832 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:33,975][24592] Fps is (10 sec: 35225.2, 60 sec: 38118.2, 300 sec: 39599.3). Total num frames: 2890473472. Throughput: 0: 9718.1. Samples: 472607778. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:33,976][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:34,005][626795] Updated weights for policy 0, policy_version 352842 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:36,114][626795] Updated weights for policy 0, policy_version 352852 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:38,113][626795] Updated weights for policy 0, policy_version 352862 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:38,975][24592] Fps is (10 sec: 38503.2, 60 sec: 38372.8, 300 sec: 39654.8). Total num frames: 2890670080. Throughput: 0: 9649.9. Samples: 472666380. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:38,981][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:41,987][626795] Updated weights for policy 0, policy_version 352872 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:43,975][24592] Fps is (10 sec: 29491.4, 60 sec: 37000.5, 300 sec: 39293.8). Total num frames: 2890768384. Throughput: 0: 9276.4. Samples: 472679208. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:43,976][24592] Avg episode reward: [(0, '4.788')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:46,201][626795] Updated weights for policy 0, policy_version 352882 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:48,975][24592] Fps is (10 sec: 19660.8, 60 sec: 35635.3, 300 sec: 38877.3). Total num frames: 2890866688. Throughput: 0: 8542.4. Samples: 472708854. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:48,976][24592] Avg episode reward: [(0, '4.702')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:49,545][626795] Updated weights for policy 0, policy_version 352892 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:51,628][626795] Updated weights for policy 0, policy_version 352902 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:53,975][24592] Fps is (10 sec: 27852.9, 60 sec: 35225.6, 300 sec: 38821.7). Total num frames: 2891046912. Throughput: 0: 8610.9. Samples: 472760358. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:53,978][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:54,105][626795] Updated weights for policy 0, policy_version 352912 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:56,215][626795] Updated weights for policy 0, policy_version 352922 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:53:58,328][626795] Updated weights for policy 0, policy_version 352932 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:58,976][24592] Fps is (10 sec: 37682.8, 60 sec: 35089.1, 300 sec: 38794.0). Total num frames: 2891243520. Throughput: 0: 8558.2. Samples: 472788318. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:53:58,977][24592] Avg episode reward: [(0, '4.593')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:00,437][626795] Updated weights for policy 0, policy_version 352942 (0.0035)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:02,764][626795] Updated weights for policy 0, policy_version 352952 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:03,975][24592] Fps is (10 sec: 37683.1, 60 sec: 34542.9, 300 sec: 38710.7). Total num frames: 2891423744. Throughput: 0: 8462.3. Samples: 472846044. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:03,977][24592] Avg episode reward: [(0, '4.503')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:04,928][626795] Updated weights for policy 0, policy_version 352962 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:06,986][626795] Updated weights for policy 0, policy_version 352972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:08,976][24592] Fps is (10 sec: 37680.9, 60 sec: 35362.3, 300 sec: 38655.1). Total num frames: 2891620352. Throughput: 0: 8335.1. Samples: 472903980. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:08,978][24592] Avg episode reward: [(0, '4.667')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:09,155][626795] Updated weights for policy 0, policy_version 352982 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:11,470][626795] Updated weights for policy 0, policy_version 352992 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:13,975][24592] Fps is (10 sec: 34406.6, 60 sec: 34406.7, 300 sec: 38544.1). Total num frames: 2891767808. Throughput: 0: 8379.0. Samples: 472930758. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:13,979][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:14,384][626795] Updated weights for policy 0, policy_version 353002 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:16,577][626795] Updated weights for policy 0, policy_version 353012 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:18,863][626795] Updated weights for policy 0, policy_version 353022 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:18,975][24592] Fps is (10 sec: 33590.0, 60 sec: 33996.8, 300 sec: 38460.8). Total num frames: 2891956224. Throughput: 0: 8257.9. Samples: 472979382. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:18,977][24592] Avg episode reward: [(0, '5.050')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:20,932][626795] Updated weights for policy 0, policy_version 353032 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:22,917][626795] Updated weights for policy 0, policy_version 353042 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:23,975][24592] Fps is (10 sec: 39321.5, 60 sec: 33996.8, 300 sec: 38433.2). Total num frames: 2892161024. Throughput: 0: 8271.7. Samples: 473038608. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:23,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:25,010][626795] Updated weights for policy 0, policy_version 353052 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:26,866][626795] Updated weights for policy 0, policy_version 353062 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:28,875][626795] Updated weights for policy 0, policy_version 353072 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:28,976][24592] Fps is (10 sec: 40959.6, 60 sec: 34679.6, 300 sec: 38405.2). Total num frames: 2892365824. Throughput: 0: 8664.9. Samples: 473069130. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:28,977][24592] Avg episode reward: [(0, '4.405')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:31,033][626795] Updated weights for policy 0, policy_version 353082 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:33,135][626795] Updated weights for policy 0, policy_version 353092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:33,976][24592] Fps is (10 sec: 39320.2, 60 sec: 34679.3, 300 sec: 38349.6). Total num frames: 2892554240. Throughput: 0: 9340.9. Samples: 473129196. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:33,978][24592] Avg episode reward: [(0, '4.732')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:35,625][626795] Updated weights for policy 0, policy_version 353102 (0.0030)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:37,729][626795] Updated weights for policy 0, policy_version 353112 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:38,976][24592] Fps is (10 sec: 37679.9, 60 sec: 34542.4, 300 sec: 38238.5). Total num frames: 2892742656. Throughput: 0: 9431.9. Samples: 473184804. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:38,978][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:39,543][626795] Updated weights for policy 0, policy_version 353122 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:41,653][626795] Updated weights for policy 0, policy_version 353132 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:43,618][626795] Updated weights for policy 0, policy_version 353142 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:43,976][24592] Fps is (10 sec: 39322.6, 60 sec: 36317.8, 300 sec: 38377.4). Total num frames: 2892947456. Throughput: 0: 9488.4. Samples: 473215296. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:43,976][24592] Avg episode reward: [(0, '4.753')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:45,633][626795] Updated weights for policy 0, policy_version 353152 (0.0037)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:48,600][626795] Updated weights for policy 0, policy_version 353162 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:48,976][24592] Fps is (10 sec: 36866.8, 60 sec: 37410.1, 300 sec: 38210.9). Total num frames: 2893111296. Throughput: 0: 9344.1. Samples: 473266530. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:48,980][24592] Avg episode reward: [(0, '4.528')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:50,614][626795] Updated weights for policy 0, policy_version 353172 (0.0027)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:52,524][626795] Updated weights for policy 0, policy_version 353182 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:53,975][24592] Fps is (10 sec: 37683.3, 60 sec: 37956.2, 300 sec: 38238.6). Total num frames: 2893324288. Throughput: 0: 9459.9. Samples: 473329668. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:53,976][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:54,517][626795] Updated weights for policy 0, policy_version 353192 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:56,510][626795] Updated weights for policy 0, policy_version 353202 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:54:58,436][626795] Updated weights for policy 0, policy_version 353212 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:58,975][24592] Fps is (10 sec: 42599.0, 60 sec: 38229.4, 300 sec: 38238.6). Total num frames: 2893537280. Throughput: 0: 9542.0. Samples: 473360148. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:54:58,977][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:00,386][626795] Updated weights for policy 0, policy_version 353222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:02,330][626795] Updated weights for policy 0, policy_version 353232 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:03,975][24592] Fps is (10 sec: 41779.6, 60 sec: 38639.0, 300 sec: 38210.8). Total num frames: 2893742080. Throughput: 0: 9852.9. Samples: 473422764. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:03,977][24592] Avg episode reward: [(0, '4.628')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000353240_2893742080.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:04,131][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000352124_2884599808.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:04,347][626795] Updated weights for policy 0, policy_version 353242 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:06,347][626795] Updated weights for policy 0, policy_version 353252 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:08,181][626795] Updated weights for policy 0, policy_version 353262 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:08,975][24592] Fps is (10 sec: 40959.7, 60 sec: 38775.9, 300 sec: 38183.0). Total num frames: 2893946880. Throughput: 0: 9932.5. Samples: 473485572. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:08,976][24592] Avg episode reward: [(0, '4.808')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:10,252][626795] Updated weights for policy 0, policy_version 353272 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:12,201][626795] Updated weights for policy 0, policy_version 353282 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:13,975][24592] Fps is (10 sec: 41779.2, 60 sec: 39867.7, 300 sec: 38238.6). Total num frames: 2894159872. Throughput: 0: 9936.0. Samples: 473516250. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:13,977][24592] Avg episode reward: [(0, '4.776')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:14,147][626795] Updated weights for policy 0, policy_version 353292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:16,140][626795] Updated weights for policy 0, policy_version 353302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:18,397][626795] Updated weights for policy 0, policy_version 353312 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:19,698][24592] Fps is (10 sec: 37435.8, 60 sec: 39393.2, 300 sec: 38228.3). Total num frames: 2894348288. Throughput: 0: 9755.8. Samples: 473575254. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:19,699][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:21,293][626795] Updated weights for policy 0, policy_version 353322 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:23,347][626795] Updated weights for policy 0, policy_version 353332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:23,975][24592] Fps is (10 sec: 36044.9, 60 sec: 39321.6, 300 sec: 38210.8). Total num frames: 2894520320. Throughput: 0: 9841.7. Samples: 473627670. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:23,976][24592] Avg episode reward: [(0, '4.565')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:25,358][626795] Updated weights for policy 0, policy_version 353342 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:27,439][626795] Updated weights for policy 0, policy_version 353352 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:28,977][24592] Fps is (10 sec: 39729.6, 60 sec: 39184.2, 300 sec: 38210.6). Total num frames: 2894716928. Throughput: 0: 9799.5. Samples: 473656284. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:28,978][24592] Avg episode reward: [(0, '4.858')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:29,599][626795] Updated weights for policy 0, policy_version 353362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:31,520][626795] Updated weights for policy 0, policy_version 353372 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:33,518][626795] Updated weights for policy 0, policy_version 353382 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:33,975][24592] Fps is (10 sec: 40140.5, 60 sec: 39458.4, 300 sec: 38238.6). Total num frames: 2894921728. Throughput: 0: 10031.4. Samples: 473717940. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:33,977][24592] Avg episode reward: [(0, '4.693')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:35,539][626795] Updated weights for policy 0, policy_version 353392 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:37,409][626795] Updated weights for policy 0, policy_version 353402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:38,975][24592] Fps is (10 sec: 40965.6, 60 sec: 39731.8, 300 sec: 38321.9). Total num frames: 2895126528. Throughput: 0: 10021.9. Samples: 473780652. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:38,978][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:39,454][626795] Updated weights for policy 0, policy_version 353412 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:41,283][626795] Updated weights for policy 0, policy_version 353422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:43,353][626795] Updated weights for policy 0, policy_version 353432 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:43,976][24592] Fps is (10 sec: 41778.3, 60 sec: 39867.7, 300 sec: 38405.3). Total num frames: 2895339520. Throughput: 0: 10038.5. Samples: 473811882. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:43,978][24592] Avg episode reward: [(0, '4.737')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:45,272][626795] Updated weights for policy 0, policy_version 353442 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:47,336][626795] Updated weights for policy 0, policy_version 353452 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:48,980][24592] Fps is (10 sec: 41761.6, 60 sec: 40547.7, 300 sec: 38432.4). Total num frames: 2895544320. Throughput: 0: 10025.5. Samples: 473873952. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:48,981][24592] Avg episode reward: [(0, '4.696')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:49,336][626795] Updated weights for policy 0, policy_version 353462 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:51,351][626795] Updated weights for policy 0, policy_version 353472 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:53,975][24592] Fps is (10 sec: 37684.1, 60 sec: 39867.8, 300 sec: 38405.2). Total num frames: 2895716352. Throughput: 0: 9818.6. Samples: 473927406. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:53,977][24592] Avg episode reward: [(0, '4.561')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:53,987][626795] Updated weights for policy 0, policy_version 353482 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:56,078][626795] Updated weights for policy 0, policy_version 353492 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:55:58,039][626795] Updated weights for policy 0, policy_version 353502 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:58,975][24592] Fps is (10 sec: 37699.1, 60 sec: 39731.2, 300 sec: 38377.4). Total num frames: 2895921152. Throughput: 0: 9813.1. Samples: 473957838. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:55:58,978][24592] Avg episode reward: [(0, '4.937')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:00,137][626795] Updated weights for policy 0, policy_version 353512 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:02,123][626795] Updated weights for policy 0, policy_version 353522 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:03,976][24592] Fps is (10 sec: 40958.6, 60 sec: 39731.0, 300 sec: 38433.0). Total num frames: 2896125952. Throughput: 0: 10029.7. Samples: 474019344. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:03,977][24592] Avg episode reward: [(0, '4.805')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:04,027][626795] Updated weights for policy 0, policy_version 353532 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:06,155][626795] Updated weights for policy 0, policy_version 353542 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:08,128][626795] Updated weights for policy 0, policy_version 353552 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:08,975][24592] Fps is (10 sec: 40959.7, 60 sec: 39731.2, 300 sec: 38460.7). Total num frames: 2896330752. Throughput: 0: 10051.0. Samples: 474079968. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:08,978][24592] Avg episode reward: [(0, '4.841')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:10,168][626795] Updated weights for policy 0, policy_version 353562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:12,167][626795] Updated weights for policy 0, policy_version 353572 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:13,975][24592] Fps is (10 sec: 40142.2, 60 sec: 39458.1, 300 sec: 38433.0). Total num frames: 2896527360. Throughput: 0: 10104.3. Samples: 474110964. Policy #0 lag: (min: 0.0, avg: 2.1, max: 4.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:13,978][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:14,223][626795] Updated weights for policy 0, policy_version 353582 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:16,226][626795] Updated weights for policy 0, policy_version 353592 (0.0024)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:18,235][626795] Updated weights for policy 0, policy_version 353602 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:18,975][24592] Fps is (10 sec: 40960.4, 60 sec: 40353.8, 300 sec: 38460.7). Total num frames: 2896740352. Throughput: 0: 10081.5. Samples: 474171606. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:18,977][24592] Avg episode reward: [(0, '4.932')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:20,198][626795] Updated weights for policy 0, policy_version 353612 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:22,227][626795] Updated weights for policy 0, policy_version 353622 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:23,975][24592] Fps is (10 sec: 40959.6, 60 sec: 40277.3, 300 sec: 38544.0). Total num frames: 2896936960. Throughput: 0: 10054.3. Samples: 474233094. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:23,976][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:24,248][626795] Updated weights for policy 0, policy_version 353632 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:27,081][626795] Updated weights for policy 0, policy_version 353642 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:28,976][24592] Fps is (10 sec: 36863.0, 60 sec: 39868.5, 300 sec: 38432.9). Total num frames: 2897108992. Throughput: 0: 9842.3. Samples: 474254784. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:28,978][24592] Avg episode reward: [(0, '4.595')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:29,069][626795] Updated weights for policy 0, policy_version 353652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:31,028][626795] Updated weights for policy 0, policy_version 353662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:32,846][626795] Updated weights for policy 0, policy_version 353672 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:33,975][24592] Fps is (10 sec: 38502.8, 60 sec: 40004.3, 300 sec: 38405.2). Total num frames: 2897321984. Throughput: 0: 9865.5. Samples: 474317856. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:33,976][24592] Avg episode reward: [(0, '4.675')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:34,921][626795] Updated weights for policy 0, policy_version 353682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:36,806][626795] Updated weights for policy 0, policy_version 353692 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:38,844][626795] Updated weights for policy 0, policy_version 353702 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:38,975][24592] Fps is (10 sec: 41779.9, 60 sec: 40004.2, 300 sec: 38377.5). Total num frames: 2897526784. Throughput: 0: 10082.5. Samples: 474381120. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:38,978][24592] Avg episode reward: [(0, '4.759')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:40,760][626795] Updated weights for policy 0, policy_version 353712 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:42,682][626795] Updated weights for policy 0, policy_version 353722 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:43,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40004.4, 300 sec: 38377.4). Total num frames: 2897739776. Throughput: 0: 10094.5. Samples: 474412092. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:43,976][24592] Avg episode reward: [(0, '4.527')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:44,594][626795] Updated weights for policy 0, policy_version 353732 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:46,515][626795] Updated weights for policy 0, policy_version 353742 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:48,583][626795] Updated weights for policy 0, policy_version 353752 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:48,975][24592] Fps is (10 sec: 42599.0, 60 sec: 40143.7, 300 sec: 38377.4). Total num frames: 2897952768. Throughput: 0: 10125.8. Samples: 474475002. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:48,978][24592] Avg episode reward: [(0, '4.793')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:50,646][626795] Updated weights for policy 0, policy_version 353762 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:52,462][626795] Updated weights for policy 0, policy_version 353772 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:53,976][24592] Fps is (10 sec: 42598.3, 60 sec: 40823.4, 300 sec: 38433.0). Total num frames: 2898165760. Throughput: 0: 10182.7. Samples: 474538188. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:53,977][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:54,517][626795] Updated weights for policy 0, policy_version 353782 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:56,341][626795] Updated weights for policy 0, policy_version 353792 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:56:58,330][626795] Updated weights for policy 0, policy_version 353802 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:58,975][24592] Fps is (10 sec: 41778.6, 60 sec: 40823.4, 300 sec: 38516.3). Total num frames: 2898370560. Throughput: 0: 10196.0. Samples: 474569784. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:56:58,977][24592] Avg episode reward: [(0, '4.839')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:00,312][626795] Updated weights for policy 0, policy_version 353812 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:02,330][626795] Updated weights for policy 0, policy_version 353822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40960.2, 300 sec: 38544.0). Total num frames: 2898583552. Throughput: 0: 10228.7. Samples: 474631896. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:03,978][24592] Avg episode reward: [(0, '4.731')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:03,982][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000353831_2898583552.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:04,149][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000352704_2889351168.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:04,196][626795] Updated weights for policy 0, policy_version 353832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:06,315][626795] Updated weights for policy 0, policy_version 353842 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:08,140][626795] Updated weights for policy 0, policy_version 353852 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:08,975][24592] Fps is (10 sec: 41779.6, 60 sec: 40960.1, 300 sec: 38488.5). Total num frames: 2898788352. Throughput: 0: 10245.4. Samples: 474694134. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:08,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:10,246][626795] Updated weights for policy 0, policy_version 353862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:12,211][626795] Updated weights for policy 0, policy_version 353872 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:14,538][24592] Fps is (10 sec: 35675.7, 60 sec: 40173.6, 300 sec: 38276.7). Total num frames: 2898960384. Throughput: 0: 10324.0. Samples: 474725172. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:14,539][24592] Avg episode reward: [(0, '4.676')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:15,334][626795] Updated weights for policy 0, policy_version 353882 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:17,334][626795] Updated weights for policy 0, policy_version 353892 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:18,975][24592] Fps is (10 sec: 36044.8, 60 sec: 40140.8, 300 sec: 38266.4). Total num frames: 2899148800. Throughput: 0: 10160.9. Samples: 474775098. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:18,978][24592] Avg episode reward: [(0, '4.928')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:19,277][626795] Updated weights for policy 0, policy_version 353902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:21,203][626795] Updated weights for policy 0, policy_version 353912 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:23,141][626795] Updated weights for policy 0, policy_version 353922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:23,975][24592] Fps is (10 sec: 42534.5, 60 sec: 40413.9, 300 sec: 38433.0). Total num frames: 2899361792. Throughput: 0: 10151.5. Samples: 474837936. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:23,976][24592] Avg episode reward: [(0, '4.774')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:25,151][626795] Updated weights for policy 0, policy_version 353932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:27,111][626795] Updated weights for policy 0, policy_version 353942 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:28,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40960.2, 300 sec: 38577.4). Total num frames: 2899566592. Throughput: 0: 10170.1. Samples: 474869748. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:28,977][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:29,090][626795] Updated weights for policy 0, policy_version 353952 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:30,990][626795] Updated weights for policy 0, policy_version 353962 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:32,906][626795] Updated weights for policy 0, policy_version 353972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:33,975][24592] Fps is (10 sec: 42598.3, 60 sec: 41096.5, 300 sec: 38712.1). Total num frames: 2899787776. Throughput: 0: 10170.9. Samples: 474932694. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:33,976][24592] Avg episode reward: [(0, '4.353')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:34,813][626795] Updated weights for policy 0, policy_version 353982 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:36,730][626795] Updated weights for policy 0, policy_version 353992 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:38,705][626795] Updated weights for policy 0, policy_version 354002 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:38,976][24592] Fps is (10 sec: 42596.9, 60 sec: 41096.4, 300 sec: 38793.9). Total num frames: 2899992576. Throughput: 0: 10189.0. Samples: 474996696. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:38,977][24592] Avg episode reward: [(0, '4.519')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:40,672][626795] Updated weights for policy 0, policy_version 354012 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:42,497][626795] Updated weights for policy 0, policy_version 354022 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:43,976][24592] Fps is (10 sec: 41777.6, 60 sec: 41096.3, 300 sec: 38905.0). Total num frames: 2900205568. Throughput: 0: 10188.9. Samples: 475028286. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:43,977][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:44,614][626795] Updated weights for policy 0, policy_version 354032 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:46,585][626795] Updated weights for policy 0, policy_version 354042 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:48,975][24592] Fps is (10 sec: 38503.7, 60 sec: 40413.8, 300 sec: 38794.0). Total num frames: 2900377600. Throughput: 0: 10014.0. Samples: 475082526. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:48,977][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:49,405][626795] Updated weights for policy 0, policy_version 354052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:51,261][626795] Updated weights for policy 0, policy_version 354062 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:53,063][626795] Updated weights for policy 0, policy_version 354072 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:53,976][24592] Fps is (10 sec: 38501.6, 60 sec: 40413.5, 300 sec: 38821.7). Total num frames: 2900590592. Throughput: 0: 10026.3. Samples: 475145322. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:53,978][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:55,172][626795] Updated weights for policy 0, policy_version 354082 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:57,134][626795] Updated weights for policy 0, policy_version 354092 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:58,975][24592] Fps is (10 sec: 42598.1, 60 sec: 40550.4, 300 sec: 38821.7). Total num frames: 2900803584. Throughput: 0: 10160.8. Samples: 475176690. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:57:58,977][24592] Avg episode reward: [(0, '4.438')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:57:58,977][626795] Updated weights for policy 0, policy_version 354102 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:00,915][626795] Updated weights for policy 0, policy_version 354112 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:02,916][626795] Updated weights for policy 0, policy_version 354122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:03,976][24592] Fps is (10 sec: 41780.9, 60 sec: 40413.8, 300 sec: 39016.3). Total num frames: 2901008384. Throughput: 0: 10333.0. Samples: 475240086. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:03,978][24592] Avg episode reward: [(0, '4.832')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:04,795][626795] Updated weights for policy 0, policy_version 354132 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:06,772][626795] Updated weights for policy 0, policy_version 354142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:08,696][626795] Updated weights for policy 0, policy_version 354152 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:08,975][24592] Fps is (10 sec: 40960.2, 60 sec: 40413.8, 300 sec: 39016.2). Total num frames: 2901213184. Throughput: 0: 10336.7. Samples: 475303086. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:08,976][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:10,700][626795] Updated weights for policy 0, policy_version 354162 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:12,647][626795] Updated weights for policy 0, policy_version 354172 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:13,975][24592] Fps is (10 sec: 42598.9, 60 sec: 41623.4, 300 sec: 39043.9). Total num frames: 2901434368. Throughput: 0: 10324.4. Samples: 475334346. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:13,978][24592] Avg episode reward: [(0, '4.859')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:14,618][626795] Updated weights for policy 0, policy_version 354182 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:16,583][626795] Updated weights for policy 0, policy_version 354192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:18,474][626795] Updated weights for policy 0, policy_version 354202 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:18,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.6, 300 sec: 39016.1). Total num frames: 2901630976. Throughput: 0: 10334.9. Samples: 475397766. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:18,976][24592] Avg episode reward: [(0, '4.572')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:21,294][626795] Updated weights for policy 0, policy_version 354212 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:23,235][626795] Updated weights for policy 0, policy_version 354222 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:23,975][24592] Fps is (10 sec: 37683.5, 60 sec: 40823.5, 300 sec: 39071.7). Total num frames: 2901811200. Throughput: 0: 10103.5. Samples: 475451352. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:23,976][24592] Avg episode reward: [(0, '4.619')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:25,280][626795] Updated weights for policy 0, policy_version 354232 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:27,192][626795] Updated weights for policy 0, policy_version 354242 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:28,976][24592] Fps is (10 sec: 39320.8, 60 sec: 40959.9, 300 sec: 39155.0). Total num frames: 2902024192. Throughput: 0: 10109.6. Samples: 475483218. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:28,977][24592] Avg episode reward: [(0, '4.716')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:29,162][626795] Updated weights for policy 0, policy_version 354252 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:31,077][626795] Updated weights for policy 0, policy_version 354262 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:32,956][626795] Updated weights for policy 0, policy_version 354272 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:33,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40686.9, 300 sec: 39182.8). Total num frames: 2902228992. Throughput: 0: 10302.5. Samples: 475546140. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:33,977][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:34,993][626795] Updated weights for policy 0, policy_version 354282 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:36,906][626795] Updated weights for policy 0, policy_version 354292 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:38,854][626795] Updated weights for policy 0, policy_version 354302 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:38,976][24592] Fps is (10 sec: 41777.1, 60 sec: 40823.2, 300 sec: 39571.4). Total num frames: 2902441984. Throughput: 0: 10308.9. Samples: 475609224. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:38,981][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:40,821][626795] Updated weights for policy 0, policy_version 354312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:42,531][626795] Updated weights for policy 0, policy_version 354322 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:43,975][24592] Fps is (10 sec: 43417.5, 60 sec: 40960.2, 300 sec: 39988.1). Total num frames: 2902663168. Throughput: 0: 10323.5. Samples: 475641246. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:43,977][24592] Avg episode reward: [(0, '4.534')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:44,609][626795] Updated weights for policy 0, policy_version 354332 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:46,638][626795] Updated weights for policy 0, policy_version 354342 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:48,580][626795] Updated weights for policy 0, policy_version 354352 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:48,975][24592] Fps is (10 sec: 42601.4, 60 sec: 41506.1, 300 sec: 40071.4). Total num frames: 2902867968. Throughput: 0: 10311.0. Samples: 475704078. Policy #0 lag: (min: 0.0, avg: 1.8, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:48,976][24592] Avg episode reward: [(0, '4.848')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:50,541][626795] Updated weights for policy 0, policy_version 354362 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:52,450][626795] Updated weights for policy 0, policy_version 354372 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:53,976][24592] Fps is (10 sec: 37682.8, 60 sec: 40823.8, 300 sec: 39988.1). Total num frames: 2903040000. Throughput: 0: 10163.2. Samples: 475760430. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:53,976][24592] Avg episode reward: [(0, '4.745')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:55,354][626795] Updated weights for policy 0, policy_version 354382 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:57,143][626795] Updated weights for policy 0, policy_version 354392 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:58,975][24592] Fps is (10 sec: 38502.3, 60 sec: 40823.5, 300 sec: 40099.2). Total num frames: 2903252992. Throughput: 0: 10115.7. Samples: 475789554. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:58:58,978][24592] Avg episode reward: [(0, '4.518')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:58:59,221][626795] Updated weights for policy 0, policy_version 354402 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:01,187][626795] Updated weights for policy 0, policy_version 354412 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:02,962][626795] Updated weights for policy 0, policy_version 354422 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:03,975][24592] Fps is (10 sec: 41779.6, 60 sec: 40823.6, 300 sec: 40127.0). Total num frames: 2903457792. Throughput: 0: 10113.3. Samples: 475852866. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:03,977][24592] Avg episode reward: [(0, '4.838')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000354426_2903457792.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:04,070][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000353240_2893742080.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:05,057][626795] Updated weights for policy 0, policy_version 354432 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:07,069][626795] Updated weights for policy 0, policy_version 354442 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:08,975][24592] Fps is (10 sec: 40959.9, 60 sec: 40823.5, 300 sec: 40321.3). Total num frames: 2903662592. Throughput: 0: 10303.1. Samples: 475914990. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:08,976][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:08,993][626795] Updated weights for policy 0, policy_version 354452 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:10,993][626795] Updated weights for policy 0, policy_version 354462 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:12,934][626795] Updated weights for policy 0, policy_version 354472 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:13,975][24592] Fps is (10 sec: 41779.0, 60 sec: 40686.9, 300 sec: 40404.6). Total num frames: 2903875584. Throughput: 0: 10274.0. Samples: 475945548. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:13,977][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:14,913][626795] Updated weights for policy 0, policy_version 354482 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:16,875][626795] Updated weights for policy 0, policy_version 354492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:18,782][626795] Updated weights for policy 0, policy_version 354502 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:18,975][24592] Fps is (10 sec: 42598.5, 60 sec: 40960.0, 300 sec: 40432.4). Total num frames: 2904088576. Throughput: 0: 10285.5. Samples: 476008986. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:18,977][24592] Avg episode reward: [(0, '4.993')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:20,801][626795] Updated weights for policy 0, policy_version 354512 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:22,628][626795] Updated weights for policy 0, policy_version 354522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:23,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41369.6, 300 sec: 40432.4). Total num frames: 2904293376. Throughput: 0: 10275.4. Samples: 476071608. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:23,977][24592] Avg episode reward: [(0, '4.526')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:24,657][626795] Updated weights for policy 0, policy_version 354532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:27,439][626795] Updated weights for policy 0, policy_version 354542 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:28,975][24592] Fps is (10 sec: 38502.5, 60 sec: 40823.6, 300 sec: 40404.7). Total num frames: 2904473600. Throughput: 0: 10122.4. Samples: 476096754. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:28,976][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:29,498][626795] Updated weights for policy 0, policy_version 354552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:31,395][626795] Updated weights for policy 0, policy_version 354562 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:33,273][626795] Updated weights for policy 0, policy_version 354572 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:33,976][24592] Fps is (10 sec: 38499.5, 60 sec: 40823.0, 300 sec: 40460.2). Total num frames: 2904678400. Throughput: 0: 10067.3. Samples: 476157114. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:33,977][24592] Avg episode reward: [(0, '4.691')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:35,237][626795] Updated weights for policy 0, policy_version 354582 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:37,196][626795] Updated weights for policy 0, policy_version 354592 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:38,975][24592] Fps is (10 sec: 41779.0, 60 sec: 40823.9, 300 sec: 40487.9). Total num frames: 2904891392. Throughput: 0: 10238.3. Samples: 476221152. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:38,977][24592] Avg episode reward: [(0, '4.708')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:39,120][626795] Updated weights for policy 0, policy_version 354602 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:41,087][626795] Updated weights for policy 0, policy_version 354612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:42,978][626795] Updated weights for policy 0, policy_version 354622 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:43,975][24592] Fps is (10 sec: 42601.7, 60 sec: 40687.0, 300 sec: 40654.6). Total num frames: 2905104384. Throughput: 0: 10289.9. Samples: 476252598. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:43,978][24592] Avg episode reward: [(0, '4.582')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:44,905][626795] Updated weights for policy 0, policy_version 354632 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:46,808][626795] Updated weights for policy 0, policy_version 354642 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:48,860][626795] Updated weights for policy 0, policy_version 354652 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:48,976][24592] Fps is (10 sec: 42598.0, 60 sec: 40823.4, 300 sec: 40654.5). Total num frames: 2905317376. Throughput: 0: 10288.4. Samples: 476315844. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:48,978][24592] Avg episode reward: [(0, '4.679')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:50,720][626795] Updated weights for policy 0, policy_version 354662 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:52,687][626795] Updated weights for policy 0, policy_version 354672 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:53,975][24592] Fps is (10 sec: 41778.7, 60 sec: 41369.6, 300 sec: 40626.8). Total num frames: 2905522176. Throughput: 0: 10313.5. Samples: 476379096. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:53,977][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:54,686][626795] Updated weights for policy 0, policy_version 354682 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:56,557][626795] Updated weights for policy 0, policy_version 354692 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 20:59:58,575][626795] Updated weights for policy 0, policy_version 354702 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:58,975][24592] Fps is (10 sec: 41780.0, 60 sec: 41369.6, 300 sec: 40654.5). Total num frames: 2905735168. Throughput: 0: 10335.8. Samples: 476410656. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 20:59:58,977][24592] Avg episode reward: [(0, '4.897')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:01,323][626795] Updated weights for policy 0, policy_version 354712 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:03,227][626795] Updated weights for policy 0, policy_version 354722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:03,975][24592] Fps is (10 sec: 38502.4, 60 sec: 40823.4, 300 sec: 40543.5). Total num frames: 2905907200. Throughput: 0: 10125.9. Samples: 476464650. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:03,976][24592] Avg episode reward: [(0, '4.685')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:05,216][626795] Updated weights for policy 0, policy_version 354732 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:07,233][626795] Updated weights for policy 0, policy_version 354742 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:08,976][24592] Fps is (10 sec: 38499.6, 60 sec: 40959.6, 300 sec: 40543.4). Total num frames: 2906120192. Throughput: 0: 10143.6. Samples: 476528076. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:08,978][24592] Avg episode reward: [(0, '4.773')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:09,023][626795] Updated weights for policy 0, policy_version 354752 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:11,040][626795] Updated weights for policy 0, policy_version 354762 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:13,040][626795] Updated weights for policy 0, policy_version 354772 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:13,975][24592] Fps is (10 sec: 41779.5, 60 sec: 40823.5, 300 sec: 40698.7). Total num frames: 2906324992. Throughput: 0: 10282.3. Samples: 476559456. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:13,976][24592] Avg episode reward: [(0, '4.552')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:14,955][626795] Updated weights for policy 0, policy_version 354782 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:16,890][626795] Updated weights for policy 0, policy_version 354792 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:18,920][626795] Updated weights for policy 0, policy_version 354802 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:18,975][24592] Fps is (10 sec: 41782.0, 60 sec: 40823.5, 300 sec: 40737.8). Total num frames: 2906537984. Throughput: 0: 10336.3. Samples: 476622240. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:18,977][24592] Avg episode reward: [(0, '4.699')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:20,918][626795] Updated weights for policy 0, policy_version 354812 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:22,697][626795] Updated weights for policy 0, policy_version 354822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:23,976][24592] Fps is (10 sec: 42595.9, 60 sec: 40959.6, 300 sec: 40793.5). Total num frames: 2906750976. Throughput: 0: 10318.3. Samples: 476685480. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:23,978][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:24,749][626795] Updated weights for policy 0, policy_version 354832 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:26,595][626795] Updated weights for policy 0, policy_version 354842 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:28,562][626795] Updated weights for policy 0, policy_version 354852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:28,978][24592] Fps is (10 sec: 42588.5, 60 sec: 41504.5, 300 sec: 40820.8). Total num frames: 2906963968. Throughput: 0: 10322.5. Samples: 476717136. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:28,979][24592] Avg episode reward: [(0, '4.577')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:30,584][626795] Updated weights for policy 0, policy_version 354862 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:33,305][626795] Updated weights for policy 0, policy_version 354872 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:33,976][24592] Fps is (10 sec: 38502.4, 60 sec: 40960.1, 300 sec: 40710.0). Total num frames: 2907136000. Throughput: 0: 10234.8. Samples: 476776416. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:33,977][24592] Avg episode reward: [(0, '4.715')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:35,309][626795] Updated weights for policy 0, policy_version 354882 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:37,363][626795] Updated weights for policy 0, policy_version 354892 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:38,975][24592] Fps is (10 sec: 37692.0, 60 sec: 40823.5, 300 sec: 40682.3). Total num frames: 2907340800. Throughput: 0: 10107.8. Samples: 476833944. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:38,976][24592] Avg episode reward: [(0, '4.743')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:39,260][626795] Updated weights for policy 0, policy_version 354902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:41,239][626795] Updated weights for policy 0, policy_version 354912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:43,045][626795] Updated weights for policy 0, policy_version 354922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:43,976][24592] Fps is (10 sec: 41779.4, 60 sec: 40823.1, 300 sec: 40710.6). Total num frames: 2907553792. Throughput: 0: 10084.9. Samples: 476864484. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:43,977][24592] Avg episode reward: [(0, '4.567')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:45,129][626795] Updated weights for policy 0, policy_version 354932 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:46,993][626795] Updated weights for policy 0, policy_version 354942 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:48,953][626795] Updated weights for policy 0, policy_version 354952 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:48,976][24592] Fps is (10 sec: 42597.3, 60 sec: 40823.4, 300 sec: 40848.9). Total num frames: 2907766784. Throughput: 0: 10296.1. Samples: 476927976. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:48,977][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:50,968][626795] Updated weights for policy 0, policy_version 354962 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:52,945][626795] Updated weights for policy 0, policy_version 354972 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:53,975][24592] Fps is (10 sec: 41781.5, 60 sec: 40823.5, 300 sec: 40848.9). Total num frames: 2907971584. Throughput: 0: 10291.5. Samples: 476991186. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:53,977][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:54,956][626795] Updated weights for policy 0, policy_version 354982 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:56,886][626795] Updated weights for policy 0, policy_version 354992 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:00:58,782][626795] Updated weights for policy 0, policy_version 355002 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:58,975][24592] Fps is (10 sec: 40961.1, 60 sec: 40686.9, 300 sec: 40849.0). Total num frames: 2908176384. Throughput: 0: 10282.3. Samples: 477022158. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:00:58,977][24592] Avg episode reward: [(0, '4.750')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:00,734][626795] Updated weights for policy 0, policy_version 355012 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:02,675][626795] Updated weights for policy 0, policy_version 355022 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:03,975][24592] Fps is (10 sec: 41779.3, 60 sec: 41369.7, 300 sec: 40876.7). Total num frames: 2908389376. Throughput: 0: 10288.5. Samples: 477085224. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:03,976][24592] Avg episode reward: [(0, '4.713')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000355028_2908389376.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:04,112][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000353831_2898583552.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:04,714][626795] Updated weights for policy 0, policy_version 355032 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:07,560][626795] Updated weights for policy 0, policy_version 355042 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:08,975][24592] Fps is (10 sec: 38502.4, 60 sec: 40687.4, 300 sec: 40793.4). Total num frames: 2908561408. Throughput: 0: 10069.9. Samples: 477138618. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:08,976][24592] Avg episode reward: [(0, '4.845')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:09,472][626795] Updated weights for policy 0, policy_version 355052 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:11,400][626795] Updated weights for policy 0, policy_version 355062 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:13,408][626795] Updated weights for policy 0, policy_version 355072 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:13,976][24592] Fps is (10 sec: 37682.2, 60 sec: 40686.8, 300 sec: 40765.6). Total num frames: 2908766208. Throughput: 0: 10058.9. Samples: 477169764. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:13,977][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:15,416][626795] Updated weights for policy 0, policy_version 355082 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:17,326][626795] Updated weights for policy 0, policy_version 355092 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:18,977][24592] Fps is (10 sec: 41773.3, 60 sec: 40686.0, 300 sec: 40821.0). Total num frames: 2908979200. Throughput: 0: 10135.4. Samples: 477232518. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:18,979][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:19,349][626795] Updated weights for policy 0, policy_version 355102 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:21,202][626795] Updated weights for policy 0, policy_version 355112 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:23,127][626795] Updated weights for policy 0, policy_version 355122 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:23,976][24592] Fps is (10 sec: 42597.6, 60 sec: 40687.0, 300 sec: 40960.0). Total num frames: 2909192192. Throughput: 0: 10250.7. Samples: 477295230. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:23,978][24592] Avg episode reward: [(0, '4.768')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:25,161][626795] Updated weights for policy 0, policy_version 355132 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:27,215][626795] Updated weights for policy 0, policy_version 355142 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:28,976][24592] Fps is (10 sec: 42603.2, 60 sec: 40688.3, 300 sec: 40960.0). Total num frames: 2909405184. Throughput: 0: 10268.9. Samples: 477326580. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:28,976][626795] Updated weights for policy 0, policy_version 355152 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:28,977][24592] Avg episode reward: [(0, '4.398')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:31,244][626795] Updated weights for policy 0, policy_version 355162 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:33,218][626795] Updated weights for policy 0, policy_version 355172 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:33,975][24592] Fps is (10 sec: 40961.5, 60 sec: 41096.9, 300 sec: 40932.2). Total num frames: 2909601792. Throughput: 0: 10209.1. Samples: 477387384. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:33,977][24592] Avg episode reward: [(0, '4.757')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:35,207][626795] Updated weights for policy 0, policy_version 355182 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:36,997][626795] Updated weights for policy 0, policy_version 355192 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:39,197][24592] Fps is (10 sec: 37668.7, 60 sec: 40673.2, 300 sec: 40818.3). Total num frames: 2909790208. Throughput: 0: 9448.3. Samples: 477418452. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:39,198][24592] Avg episode reward: [(0, '4.456')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:39,929][626795] Updated weights for policy 0, policy_version 355202 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:40,705][626772] Signal inference workers to stop experience collection... (6100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:40,707][626772] Signal inference workers to resume experience collection... (6100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:40,720][626795] InferenceWorker_p0-w0: stopping experience collection (6100 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:40,728][626795] InferenceWorker_p0-w0: resuming experience collection (6100 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:41,825][626795] Updated weights for policy 0, policy_version 355212 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:43,870][626795] Updated weights for policy 0, policy_version 355222 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:43,975][24592] Fps is (10 sec: 38502.5, 60 sec: 40550.7, 300 sec: 40793.4). Total num frames: 2909986816. Throughput: 0: 10015.2. Samples: 477472842. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:43,977][24592] Avg episode reward: [(0, '4.725')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:45,619][626795] Updated weights for policy 0, policy_version 355232 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:47,618][626795] Updated weights for policy 0, policy_version 355242 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:48,975][24592] Fps is (10 sec: 41050.6, 60 sec: 40414.0, 300 sec: 40765.6). Total num frames: 2910191616. Throughput: 0: 10027.5. Samples: 477536460. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:48,977][24592] Avg episode reward: [(0, '4.760')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:49,716][626795] Updated weights for policy 0, policy_version 355252 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:51,562][626795] Updated weights for policy 0, policy_version 355262 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:53,522][626795] Updated weights for policy 0, policy_version 355272 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:53,985][24592] Fps is (10 sec: 41740.9, 60 sec: 40544.2, 300 sec: 40792.1). Total num frames: 2910404608. Throughput: 0: 10234.4. Samples: 477599262. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:53,986][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:55,497][626795] Updated weights for policy 0, policy_version 355282 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:57,427][626795] Updated weights for policy 0, policy_version 355292 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:58,975][24592] Fps is (10 sec: 42598.5, 60 sec: 40686.9, 300 sec: 40793.4). Total num frames: 2910617600. Throughput: 0: 10232.7. Samples: 477630234. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:01:58,977][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:01:59,444][626795] Updated weights for policy 0, policy_version 355302 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:01,337][626795] Updated weights for policy 0, policy_version 355312 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:03,293][626795] Updated weights for policy 0, policy_version 355322 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:03,975][24592] Fps is (10 sec: 41817.8, 60 sec: 40550.4, 300 sec: 40793.4). Total num frames: 2910822400. Throughput: 0: 10237.5. Samples: 477693192. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:03,977][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:05,351][626795] Updated weights for policy 0, policy_version 355332 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:07,222][626795] Updated weights for policy 0, policy_version 355342 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:08,975][24592] Fps is (10 sec: 41778.9, 60 sec: 41233.0, 300 sec: 41010.5). Total num frames: 2911035392. Throughput: 0: 10240.6. Samples: 477756054. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:08,978][24592] Avg episode reward: [(0, '4.719')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:09,232][626795] Updated weights for policy 0, policy_version 355352 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:11,230][626795] Updated weights for policy 0, policy_version 355362 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:13,933][626795] Updated weights for policy 0, policy_version 355372 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:13,975][24592] Fps is (10 sec: 38502.4, 60 sec: 40687.1, 300 sec: 40876.7). Total num frames: 2911207424. Throughput: 0: 10207.8. Samples: 477785928. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:13,976][24592] Avg episode reward: [(0, '4.703')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:16,029][626795] Updated weights for policy 0, policy_version 355382 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:17,787][626795] Updated weights for policy 0, policy_version 355392 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:18,975][24592] Fps is (10 sec: 38502.6, 60 sec: 40687.9, 300 sec: 40876.7). Total num frames: 2911420416. Throughput: 0: 10097.4. Samples: 477841764. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:18,977][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:19,834][626795] Updated weights for policy 0, policy_version 355402 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:21,725][626795] Updated weights for policy 0, policy_version 355412 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:23,721][626795] Updated weights for policy 0, policy_version 355422 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:23,975][24592] Fps is (10 sec: 41779.1, 60 sec: 40550.7, 300 sec: 40876.7). Total num frames: 2911625216. Throughput: 0: 10861.2. Samples: 477904800. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:23,984][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:25,614][626795] Updated weights for policy 0, policy_version 355432 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:27,538][626795] Updated weights for policy 0, policy_version 355442 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:28,975][24592] Fps is (10 sec: 41779.2, 60 sec: 40550.6, 300 sec: 40848.9). Total num frames: 2911838208. Throughput: 0: 10297.6. Samples: 477936234. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:28,976][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:29,611][626795] Updated weights for policy 0, policy_version 355452 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:31,466][626795] Updated weights for policy 0, policy_version 355462 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:33,362][626795] Updated weights for policy 0, policy_version 355472 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:33,975][24592] Fps is (10 sec: 41779.3, 60 sec: 40687.0, 300 sec: 40849.0). Total num frames: 2912043008. Throughput: 0: 10294.5. Samples: 477999714. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:33,976][24592] Avg episode reward: [(0, '4.671')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:35,442][626795] Updated weights for policy 0, policy_version 355482 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:37,220][626795] Updated weights for policy 0, policy_version 355492 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:38,975][24592] Fps is (10 sec: 42598.4, 60 sec: 41385.9, 300 sec: 40876.7). Total num frames: 2912264192. Throughput: 0: 10310.5. Samples: 478063140. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:38,976][24592] Avg episode reward: [(0, '4.372')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:39,216][626795] Updated weights for policy 0, policy_version 355502 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:41,220][626795] Updated weights for policy 0, policy_version 355512 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:43,134][626795] Updated weights for policy 0, policy_version 355522 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:43,976][24592] Fps is (10 sec: 42596.6, 60 sec: 41369.3, 300 sec: 40987.7). Total num frames: 2912468992. Throughput: 0: 10314.7. Samples: 478094400. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:43,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:45,949][626795] Updated weights for policy 0, policy_version 355532 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:47,971][626795] Updated weights for policy 0, policy_version 355542 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:48,976][24592] Fps is (10 sec: 37682.1, 60 sec: 40823.3, 300 sec: 40849.0). Total num frames: 2912641024. Throughput: 0: 10105.1. Samples: 478147926. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:48,978][24592] Avg episode reward: [(0, '4.650')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:49,990][626795] Updated weights for policy 0, policy_version 355552 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:51,952][626795] Updated weights for policy 0, policy_version 355562 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:53,957][626795] Updated weights for policy 0, policy_version 355572 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:53,975][24592] Fps is (10 sec: 37684.7, 60 sec: 40693.2, 300 sec: 40821.2). Total num frames: 2912845824. Throughput: 0: 10079.1. Samples: 478209612. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:53,977][24592] Avg episode reward: [(0, '4.670')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:55,924][626795] Updated weights for policy 0, policy_version 355582 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:57,732][626795] Updated weights for policy 0, policy_version 355592 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:58,976][24592] Fps is (10 sec: 40960.8, 60 sec: 40550.3, 300 sec: 40821.2). Total num frames: 2913050624. Throughput: 0: 10108.2. Samples: 478240800. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:02:58,979][24592] Avg episode reward: [(0, '4.560')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:02:59,810][626795] Updated weights for policy 0, policy_version 355602 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:01,715][626795] Updated weights for policy 0, policy_version 355612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:03,561][626795] Updated weights for policy 0, policy_version 355622 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:03,975][24592] Fps is (10 sec: 42598.5, 60 sec: 40823.5, 300 sec: 40876.7). Total num frames: 2913271808. Throughput: 0: 10278.8. Samples: 478304310. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:03,977][24592] Avg episode reward: [(0, '4.575')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000355624_2913271808.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:04,101][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000354426_2903457792.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:05,501][626795] Updated weights for policy 0, policy_version 355632 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:07,403][626795] Updated weights for policy 0, policy_version 355642 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:08,976][24592] Fps is (10 sec: 42597.8, 60 sec: 40686.8, 300 sec: 40821.1). Total num frames: 2913476608. Throughput: 0: 10297.8. Samples: 478368204. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:08,977][24592] Avg episode reward: [(0, '4.738')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:09,528][626795] Updated weights for policy 0, policy_version 355652 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:11,462][626795] Updated weights for policy 0, policy_version 355662 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:13,330][626795] Updated weights for policy 0, policy_version 355672 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:13,975][24592] Fps is (10 sec: 41779.1, 60 sec: 41369.6, 300 sec: 40876.7). Total num frames: 2913689600. Throughput: 0: 10285.2. Samples: 478399068. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:13,977][24592] Avg episode reward: [(0, '4.608')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:15,316][626795] Updated weights for policy 0, policy_version 355682 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:17,257][626795] Updated weights for policy 0, policy_version 355692 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:18,975][24592] Fps is (10 sec: 38503.2, 60 sec: 40686.9, 300 sec: 40848.9). Total num frames: 2913861632. Throughput: 0: 10272.7. Samples: 478461984. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:18,977][24592] Avg episode reward: [(0, '4.543')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:20,043][626795] Updated weights for policy 0, policy_version 355702 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:21,977][626795] Updated weights for policy 0, policy_version 355712 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:23,975][24592] Fps is (10 sec: 37683.4, 60 sec: 40687.0, 300 sec: 40821.2). Total num frames: 2914066432. Throughput: 0: 10060.3. Samples: 478515852. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:23,977][24592] Avg episode reward: [(0, '4.649')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:24,038][626795] Updated weights for policy 0, policy_version 355722 (0.0033)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:25,935][626795] Updated weights for policy 0, policy_version 355732 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:27,890][626795] Updated weights for policy 0, policy_version 355742 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:28,976][24592] Fps is (10 sec: 41778.1, 60 sec: 40686.7, 300 sec: 40848.9). Total num frames: 2914279424. Throughput: 0: 10057.0. Samples: 478546962. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:28,976][24592] Avg episode reward: [(0, '4.840')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:29,844][626795] Updated weights for policy 0, policy_version 355752 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:31,777][626795] Updated weights for policy 0, policy_version 355762 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:33,712][626795] Updated weights for policy 0, policy_version 355772 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:33,977][24592] Fps is (10 sec: 42590.1, 60 sec: 40822.2, 300 sec: 40848.7). Total num frames: 2914492416. Throughput: 0: 10277.2. Samples: 478610418. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:33,985][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:35,727][626795] Updated weights for policy 0, policy_version 355782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:37,497][626795] Updated weights for policy 0, policy_version 355792 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:38,975][24592] Fps is (10 sec: 42599.7, 60 sec: 40686.9, 300 sec: 40821.2). Total num frames: 2914705408. Throughput: 0: 10336.9. Samples: 478674774. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:38,976][24592] Avg episode reward: [(0, '5.048')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:39,489][626795] Updated weights for policy 0, policy_version 355802 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:41,405][626795] Updated weights for policy 0, policy_version 355812 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:43,429][626795] Updated weights for policy 0, policy_version 355822 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:43,975][24592] Fps is (10 sec: 41787.3, 60 sec: 40687.2, 300 sec: 40821.2). Total num frames: 2914910208. Throughput: 0: 10345.0. Samples: 478706322. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:43,976][24592] Avg episode reward: [(0, '4.860')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:45,357][626795] Updated weights for policy 0, policy_version 355832 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:47,266][626795] Updated weights for policy 0, policy_version 355842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:48,975][24592] Fps is (10 sec: 41779.5, 60 sec: 41369.8, 300 sec: 40960.0). Total num frames: 2915123200. Throughput: 0: 10322.5. Samples: 478768824. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:48,979][24592] Avg episode reward: [(0, '4.511')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:49,344][626795] Updated weights for policy 0, policy_version 355852 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:52,046][626795] Updated weights for policy 0, policy_version 355862 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:53,977][24592] Fps is (10 sec: 38498.0, 60 sec: 40822.7, 300 sec: 40821.0). Total num frames: 2915295232. Throughput: 0: 10105.0. Samples: 478822938. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:53,977][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:54,017][626795] Updated weights for policy 0, policy_version 355872 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:56,064][626795] Updated weights for policy 0, policy_version 355882 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:57,945][626795] Updated weights for policy 0, policy_version 355892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:58,975][24592] Fps is (10 sec: 38502.1, 60 sec: 40960.1, 300 sec: 40848.9). Total num frames: 2915508224. Throughput: 0: 10095.9. Samples: 478853382. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:03:58,976][24592] Avg episode reward: [(0, '4.623')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:03:59,941][626795] Updated weights for policy 0, policy_version 355902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:01,874][626795] Updated weights for policy 0, policy_version 355912 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:03,676][626795] Updated weights for policy 0, policy_version 355922 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:03,975][24592] Fps is (10 sec: 42603.4, 60 sec: 40823.5, 300 sec: 40876.7). Total num frames: 2915721216. Throughput: 0: 10122.9. Samples: 478917516. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:03,977][24592] Avg episode reward: [(0, '4.756')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:05,678][626795] Updated weights for policy 0, policy_version 355932 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:07,612][626795] Updated weights for policy 0, policy_version 355942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:08,975][24592] Fps is (10 sec: 42598.6, 60 sec: 40960.2, 300 sec: 40876.7). Total num frames: 2915934208. Throughput: 0: 10334.4. Samples: 478980900. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:08,977][24592] Avg episode reward: [(0, '4.601')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:09,473][626795] Updated weights for policy 0, policy_version 355952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:11,547][626795] Updated weights for policy 0, policy_version 355962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:13,554][626795] Updated weights for policy 0, policy_version 355972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:13,975][24592] Fps is (10 sec: 40960.0, 60 sec: 40687.0, 300 sec: 40821.2). Total num frames: 2916130816. Throughput: 0: 10344.1. Samples: 479012442. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:13,976][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:15,458][626795] Updated weights for policy 0, policy_version 355982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:17,544][626795] Updated weights for policy 0, policy_version 355992 (0.0018)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:18,975][24592] Fps is (10 sec: 40959.9, 60 sec: 41369.6, 300 sec: 40848.9). Total num frames: 2916343808. Throughput: 0: 10296.2. Samples: 479073726. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:18,977][24592] Avg episode reward: [(0, '4.380')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:19,523][626795] Updated weights for policy 0, policy_version 356002 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:21,467][626795] Updated weights for policy 0, policy_version 356012 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:23,385][626795] Updated weights for policy 0, policy_version 356022 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:24,769][24592] Fps is (10 sec: 39468.0, 60 sec: 40964.6, 300 sec: 40850.2). Total num frames: 2916556800. Throughput: 0: 10101.3. Samples: 479137344. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:24,769][24592] Avg episode reward: [(0, '4.459')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:26,243][626795] Updated weights for policy 0, policy_version 356032 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:28,171][626795] Updated weights for policy 0, policy_version 356042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:28,975][24592] Fps is (10 sec: 38502.4, 60 sec: 40823.7, 300 sec: 40849.0). Total num frames: 2916728832. Throughput: 0: 10047.9. Samples: 479158476. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:28,977][24592] Avg episode reward: [(0, '4.657')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:30,254][626795] Updated weights for policy 0, policy_version 356052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:32,149][626795] Updated weights for policy 0, policy_version 356062 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:33,976][24592] Fps is (10 sec: 40928.0, 60 sec: 40688.0, 300 sec: 40821.1). Total num frames: 2916933632. Throughput: 0: 10049.1. Samples: 479221038. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:33,976][24592] Avg episode reward: [(0, '4.722')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:34,142][626795] Updated weights for policy 0, policy_version 356072 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:35,994][626795] Updated weights for policy 0, policy_version 356082 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:37,934][626795] Updated weights for policy 0, policy_version 356092 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:38,976][24592] Fps is (10 sec: 41777.7, 60 sec: 40686.7, 300 sec: 40821.1). Total num frames: 2917146624. Throughput: 0: 10263.9. Samples: 479284806. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:38,978][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:39,791][626795] Updated weights for policy 0, policy_version 356102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:41,778][626795] Updated weights for policy 0, policy_version 356112 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:43,721][626795] Updated weights for policy 0, policy_version 356122 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:43,975][24592] Fps is (10 sec: 42599.9, 60 sec: 40823.5, 300 sec: 40821.2). Total num frames: 2917359616. Throughput: 0: 10295.2. Samples: 479316666. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:43,976][24592] Avg episode reward: [(0, '4.761')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:45,633][626795] Updated weights for policy 0, policy_version 356132 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:47,599][626795] Updated weights for policy 0, policy_version 356142 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:48,975][24592] Fps is (10 sec: 41780.6, 60 sec: 40686.9, 300 sec: 40821.2). Total num frames: 2917564416. Throughput: 0: 10267.6. Samples: 479379558. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:48,976][24592] Avg episode reward: [(0, '5.000')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:49,814][626795] Updated weights for policy 0, policy_version 356152 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:51,732][626795] Updated weights for policy 0, policy_version 356162 (0.0025)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:53,976][24592] Fps is (10 sec: 39319.3, 60 sec: 40960.4, 300 sec: 40737.8). Total num frames: 2917752832. Throughput: 0: 10149.7. Samples: 479437644. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:53,977][24592] Avg episode reward: [(0, '5.009')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:54,170][626795] Updated weights for policy 0, policy_version 356172 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:56,512][626795] Updated weights for policy 0, policy_version 356182 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:58,981][24592] Fps is (10 sec: 32751.1, 60 sec: 39727.8, 300 sec: 40626.1). Total num frames: 2917892096. Throughput: 0: 9998.3. Samples: 479462418. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:04:58,992][24592] Avg episode reward: [(0, '4.441')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:04:59,984][626795] Updated weights for policy 0, policy_version 356192 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:02,073][626795] Updated weights for policy 0, policy_version 356202 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:03,975][24592] Fps is (10 sec: 31950.6, 60 sec: 39185.1, 300 sec: 40515.8). Total num frames: 2918072320. Throughput: 0: 9646.9. Samples: 479507838. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:03,977][24592] Avg episode reward: [(0, '4.668')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:03,987][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000356210_2918072320.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:04,168][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000355028_2908389376.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:04,399][626795] Updated weights for policy 0, policy_version 356212 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:06,335][626795] Updated weights for policy 0, policy_version 356222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:08,616][626795] Updated weights for policy 0, policy_version 356232 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:08,984][24592] Fps is (10 sec: 37673.4, 60 sec: 38906.9, 300 sec: 40486.9). Total num frames: 2918268928. Throughput: 0: 9658.6. Samples: 479564394. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:08,987][24592] Avg episode reward: [(0, '4.770')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:10,933][626795] Updated weights for policy 0, policy_version 356242 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:13,059][626795] Updated weights for policy 0, policy_version 356252 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:13,975][24592] Fps is (10 sec: 36863.7, 60 sec: 38502.3, 300 sec: 40349.1). Total num frames: 2918440960. Throughput: 0: 9617.3. Samples: 479591256. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:13,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:15,301][626795] Updated weights for policy 0, policy_version 356262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:17,379][626795] Updated weights for policy 0, policy_version 356272 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:18,975][24592] Fps is (10 sec: 36892.6, 60 sec: 38229.3, 300 sec: 40293.6). Total num frames: 2918637568. Throughput: 0: 9509.8. Samples: 479648976. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:18,979][24592] Avg episode reward: [(0, '4.664')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:19,487][626795] Updated weights for policy 0, policy_version 356282 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:21,357][626795] Updated weights for policy 0, policy_version 356292 (0.0023)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:23,973][626795] Updated weights for policy 0, policy_version 356302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:23,975][24592] Fps is (10 sec: 38502.5, 60 sec: 38326.3, 300 sec: 40210.5). Total num frames: 2918825984. Throughput: 0: 9338.5. Samples: 479705034. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:23,985][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:26,318][626795] Updated weights for policy 0, policy_version 356312 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:28,324][626795] Updated weights for policy 0, policy_version 356322 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:28,975][24592] Fps is (10 sec: 37683.5, 60 sec: 38092.8, 300 sec: 40265.8). Total num frames: 2919014400. Throughput: 0: 9200.5. Samples: 479730690. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:28,977][24592] Avg episode reward: [(0, '4.542')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:31,385][626795] Updated weights for policy 0, policy_version 356332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:33,425][626795] Updated weights for policy 0, policy_version 356342 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:33,976][24592] Fps is (10 sec: 35225.6, 60 sec: 37410.3, 300 sec: 40126.9). Total num frames: 2919178240. Throughput: 0: 8924.5. Samples: 479781162. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:33,977][24592] Avg episode reward: [(0, '4.613')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:35,362][626795] Updated weights for policy 0, policy_version 356352 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:37,426][626795] Updated weights for policy 0, policy_version 356362 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:38,980][24592] Fps is (10 sec: 36848.3, 60 sec: 37271.2, 300 sec: 40098.6). Total num frames: 2919383040. Throughput: 0: 9004.9. Samples: 479842896. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:38,981][24592] Avg episode reward: [(0, '4.468')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:39,417][626795] Updated weights for policy 0, policy_version 356372 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:41,303][626795] Updated weights for policy 0, policy_version 356382 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:43,166][626795] Updated weights for policy 0, policy_version 356392 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:43,975][24592] Fps is (10 sec: 41779.5, 60 sec: 37273.6, 300 sec: 40099.2). Total num frames: 2919596032. Throughput: 0: 9166.5. Samples: 479874864. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:43,977][24592] Avg episode reward: [(0, '4.643')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:45,009][626795] Updated weights for policy 0, policy_version 356402 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:47,107][626795] Updated weights for policy 0, policy_version 356412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:48,975][24592] Fps is (10 sec: 41797.1, 60 sec: 37273.6, 300 sec: 40099.2). Total num frames: 2919800832. Throughput: 0: 9558.3. Samples: 479937960. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:48,977][24592] Avg episode reward: [(0, '4.807')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:49,147][626795] Updated weights for policy 0, policy_version 356422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:51,114][626795] Updated weights for policy 0, policy_version 356432 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:53,055][626795] Updated weights for policy 0, policy_version 356442 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:53,976][24592] Fps is (10 sec: 40139.9, 60 sec: 37410.3, 300 sec: 40071.3). Total num frames: 2919997440. Throughput: 0: 9651.4. Samples: 479998632. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:53,978][24592] Avg episode reward: [(0, '4.458')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:55,338][626795] Updated weights for policy 0, policy_version 356452 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:57,608][626795] Updated weights for policy 0, policy_version 356462 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:58,975][24592] Fps is (10 sec: 38502.3, 60 sec: 38232.7, 300 sec: 39988.1). Total num frames: 2920185856. Throughput: 0: 9651.9. Samples: 480025590. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:05:58,976][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:05:59,624][626795] Updated weights for policy 0, policy_version 356472 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:01,871][626795] Updated weights for policy 0, policy_version 356482 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:04,323][24592] Fps is (10 sec: 35626.0, 60 sec: 38009.1, 300 sec: 39968.7). Total num frames: 2920366080. Throughput: 0: 9596.9. Samples: 480084174. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:04,324][24592] Avg episode reward: [(0, '4.644')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:04,951][626795] Updated weights for policy 0, policy_version 356492 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:06,953][626795] Updated weights for policy 0, policy_version 356502 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:08,975][24592] Fps is (10 sec: 35225.3, 60 sec: 37824.6, 300 sec: 39904.8). Total num frames: 2920538112. Throughput: 0: 9497.3. Samples: 480132414. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:08,977][24592] Avg episode reward: [(0, '4.513')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:09,194][626795] Updated weights for policy 0, policy_version 356512 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:11,088][626795] Updated weights for policy 0, policy_version 356522 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:13,040][626795] Updated weights for policy 0, policy_version 356532 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:13,975][24592] Fps is (10 sec: 39889.3, 60 sec: 38502.4, 300 sec: 39904.9). Total num frames: 2920751104. Throughput: 0: 9613.6. Samples: 480163302. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:13,977][24592] Avg episode reward: [(0, '4.652')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:14,986][626795] Updated weights for policy 0, policy_version 356542 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:17,004][626795] Updated weights for policy 0, policy_version 356552 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:18,976][24592] Fps is (10 sec: 40957.0, 60 sec: 38501.9, 300 sec: 39849.2). Total num frames: 2920947712. Throughput: 0: 9873.2. Samples: 480225462. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:18,977][24592] Avg episode reward: [(0, '4.547')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:19,177][626795] Updated weights for policy 0, policy_version 356562 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:21,172][626795] Updated weights for policy 0, policy_version 356572 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:22,913][626795] Updated weights for policy 0, policy_version 356582 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:23,976][24592] Fps is (10 sec: 40138.4, 60 sec: 38775.1, 300 sec: 39821.4). Total num frames: 2921152512. Throughput: 0: 9864.2. Samples: 480286752. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:23,978][24592] Avg episode reward: [(0, '4.491')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:25,071][626795] Updated weights for policy 0, policy_version 356592 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:27,438][626795] Updated weights for policy 0, policy_version 356602 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:28,984][24592] Fps is (10 sec: 38474.3, 60 sec: 38633.7, 300 sec: 39764.8). Total num frames: 2921332736. Throughput: 0: 9786.2. Samples: 480315324. Policy #0 lag: (min: 0.0, avg: 2.2, max: 6.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:28,985][24592] Avg episode reward: [(0, '4.523')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:29,941][626795] Updated weights for policy 0, policy_version 356612 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:31,976][626795] Updated weights for policy 0, policy_version 356622 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:33,926][626795] Updated weights for policy 0, policy_version 356632 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:33,975][24592] Fps is (10 sec: 37685.7, 60 sec: 39185.1, 300 sec: 39823.6). Total num frames: 2921529344. Throughput: 0: 9595.3. Samples: 480369750. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:33,976][24592] Avg episode reward: [(0, '4.607')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:35,976][626795] Updated weights for policy 0, policy_version 356642 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:38,975][24592] Fps is (10 sec: 35254.1, 60 sec: 38368.6, 300 sec: 39654.8). Total num frames: 2921684992. Throughput: 0: 9342.3. Samples: 480419034. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:38,976][24592] Avg episode reward: [(0, '4.663')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:39,226][626795] Updated weights for policy 0, policy_version 356652 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:41,448][626795] Updated weights for policy 0, policy_version 356662 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:43,467][626795] Updated weights for policy 0, policy_version 356672 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:43,975][24592] Fps is (10 sec: 33587.2, 60 sec: 37819.7, 300 sec: 39571.5). Total num frames: 2921865216. Throughput: 0: 9376.0. Samples: 480447510. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:43,976][24592] Avg episode reward: [(0, '4.688')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:45,773][626795] Updated weights for policy 0, policy_version 356682 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:47,938][626795] Updated weights for policy 0, policy_version 356692 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:48,975][24592] Fps is (10 sec: 37682.6, 60 sec: 37683.1, 300 sec: 39517.2). Total num frames: 2922061824. Throughput: 0: 9382.5. Samples: 480503124. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:48,977][24592] Avg episode reward: [(0, '4.584')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:50,002][626795] Updated weights for policy 0, policy_version 356702 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:52,178][626795] Updated weights for policy 0, policy_version 356712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:53,975][24592] Fps is (10 sec: 38502.4, 60 sec: 37546.8, 300 sec: 39432.7). Total num frames: 2922250240. Throughput: 0: 9520.4. Samples: 480560832. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:53,978][24592] Avg episode reward: [(0, '4.616')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:54,337][626795] Updated weights for policy 0, policy_version 356722 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:56,249][626795] Updated weights for policy 0, policy_version 356732 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:06:58,168][626795] Updated weights for policy 0, policy_version 356742 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:58,976][24592] Fps is (10 sec: 39319.1, 60 sec: 37819.2, 300 sec: 39432.6). Total num frames: 2922455040. Throughput: 0: 9528.6. Samples: 480592098. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:06:58,978][24592] Avg episode reward: [(0, '4.717')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:00,192][626795] Updated weights for policy 0, policy_version 356752 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:02,182][626795] Updated weights for policy 0, policy_version 356762 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:03,975][24592] Fps is (10 sec: 41779.2, 60 sec: 38589.5, 300 sec: 39432.7). Total num frames: 2922668032. Throughput: 0: 9525.2. Samples: 480654090. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:03,977][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000356771_2922668032.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:04,114][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000355624_2913271808.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:04,169][626795] Updated weights for policy 0, policy_version 356772 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:06,174][626795] Updated weights for policy 0, policy_version 356782 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:08,214][626795] Updated weights for policy 0, policy_version 356792 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:08,976][24592] Fps is (10 sec: 40961.2, 60 sec: 38775.2, 300 sec: 39515.9). Total num frames: 2922864640. Throughput: 0: 9534.8. Samples: 480715818. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:08,977][24592] Avg episode reward: [(0, '4.866')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:11,076][626795] Updated weights for policy 0, policy_version 356802 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:13,075][626795] Updated weights for policy 0, policy_version 356812 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:13,975][24592] Fps is (10 sec: 36864.0, 60 sec: 38092.8, 300 sec: 39377.1). Total num frames: 2923036672. Throughput: 0: 9366.1. Samples: 480736722. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:14,006][24592] Avg episode reward: [(0, '4.758')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:15,176][626795] Updated weights for policy 0, policy_version 356822 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:17,151][626795] Updated weights for policy 0, policy_version 356832 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:18,975][24592] Fps is (10 sec: 36865.9, 60 sec: 38093.3, 300 sec: 39349.4). Total num frames: 2923233280. Throughput: 0: 9510.0. Samples: 480797700. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:18,976][24592] Avg episode reward: [(0, '4.690')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:19,143][626795] Updated weights for policy 0, policy_version 356842 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:21,093][626795] Updated weights for policy 0, policy_version 356852 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:23,059][626795] Updated weights for policy 0, policy_version 356862 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:23,976][24592] Fps is (10 sec: 40958.0, 60 sec: 38229.4, 300 sec: 39349.3). Total num frames: 2923446272. Throughput: 0: 9803.9. Samples: 480860214. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:23,977][24592] Avg episode reward: [(0, '4.817')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:24,911][626795] Updated weights for policy 0, policy_version 356872 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:26,925][626795] Updated weights for policy 0, policy_version 356882 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:27,635][626772] Signal inference workers to stop experience collection... (6150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:27,642][626772] Signal inference workers to resume experience collection... (6150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:27,653][626795] InferenceWorker_p0-w0: stopping experience collection (6150 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:27,657][626795] InferenceWorker_p0-w0: resuming experience collection (6150 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:28,902][626795] Updated weights for policy 0, policy_version 356892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:28,975][24592] Fps is (10 sec: 42598.2, 60 sec: 38780.7, 300 sec: 39377.1). Total num frames: 2923659264. Throughput: 0: 9874.3. Samples: 480891852. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:28,976][24592] Avg episode reward: [(0, '4.672')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:30,834][626795] Updated weights for policy 0, policy_version 356902 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:32,887][626795] Updated weights for policy 0, policy_version 356912 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:33,975][24592] Fps is (10 sec: 41780.9, 60 sec: 38911.9, 300 sec: 39321.6). Total num frames: 2923864064. Throughput: 0: 10027.7. Samples: 480954372. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:33,977][24592] Avg episode reward: [(0, '4.729')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:34,897][626795] Updated weights for policy 0, policy_version 356922 (0.0026)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:36,969][626795] Updated weights for policy 0, policy_version 356932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:38,862][626795] Updated weights for policy 0, policy_version 356942 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:38,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39731.2, 300 sec: 39321.7). Total num frames: 2924068864. Throughput: 0: 10109.5. Samples: 481015758. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:38,977][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:40,984][626795] Updated weights for policy 0, policy_version 356952 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:42,864][626795] Updated weights for policy 0, policy_version 356962 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:43,976][24592] Fps is (10 sec: 36862.7, 60 sec: 39457.9, 300 sec: 39293.8). Total num frames: 2924232704. Throughput: 0: 10084.2. Samples: 481045884. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:43,978][24592] Avg episode reward: [(0, '4.772')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:45,908][626795] Updated weights for policy 0, policy_version 356972 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:48,078][626795] Updated weights for policy 0, policy_version 356982 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:48,975][24592] Fps is (10 sec: 35225.5, 60 sec: 39321.7, 300 sec: 39238.3). Total num frames: 2924421120. Throughput: 0: 9813.2. Samples: 481095684. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:48,976][24592] Avg episode reward: [(0, '4.813')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:50,329][626795] Updated weights for policy 0, policy_version 356992 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:52,534][626795] Updated weights for policy 0, policy_version 357002 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:53,977][24592] Fps is (10 sec: 36861.1, 60 sec: 39184.3, 300 sec: 39154.8). Total num frames: 2924601344. Throughput: 0: 9633.0. Samples: 481149312. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:53,978][24592] Avg episode reward: [(0, '4.461')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:55,350][626795] Updated weights for policy 0, policy_version 357012 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:57,598][626795] Updated weights for policy 0, policy_version 357022 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:58,976][24592] Fps is (10 sec: 35224.7, 60 sec: 38639.3, 300 sec: 38988.3). Total num frames: 2924773376. Throughput: 0: 9687.1. Samples: 481172646. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:07:58,977][24592] Avg episode reward: [(0, '4.381')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:07:59,884][626795] Updated weights for policy 0, policy_version 357032 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:02,177][626795] Updated weights for policy 0, policy_version 357042 (0.0023)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:03,975][24592] Fps is (10 sec: 34410.5, 60 sec: 37956.3, 300 sec: 38877.3). Total num frames: 2924945408. Throughput: 0: 9514.4. Samples: 481225848. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:03,977][24592] Avg episode reward: [(0, '4.626')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:04,622][626795] Updated weights for policy 0, policy_version 357052 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:06,632][626795] Updated weights for policy 0, policy_version 357062 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:08,943][626795] Updated weights for policy 0, policy_version 357072 (0.0031)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:08,975][24592] Fps is (10 sec: 36045.8, 60 sec: 37820.0, 300 sec: 38794.0). Total num frames: 2925133824. Throughput: 0: 9361.7. Samples: 481281486. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:08,976][24592] Avg episode reward: [(0, '4.796')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:10,974][626795] Updated weights for policy 0, policy_version 357082 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:12,965][626795] Updated weights for policy 0, policy_version 357092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:13,976][24592] Fps is (10 sec: 38501.7, 60 sec: 38229.2, 300 sec: 38877.3). Total num frames: 2925330432. Throughput: 0: 9316.4. Samples: 481311090. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:13,976][24592] Avg episode reward: [(0, '4.744')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:15,193][626795] Updated weights for policy 0, policy_version 357102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:18,224][626795] Updated weights for policy 0, policy_version 357112 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:18,975][24592] Fps is (10 sec: 35225.6, 60 sec: 37546.6, 300 sec: 38710.7). Total num frames: 2925486080. Throughput: 0: 9022.7. Samples: 481360392. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:18,976][24592] Avg episode reward: [(0, '4.692')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:20,307][626795] Updated weights for policy 0, policy_version 357122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:22,412][626795] Updated weights for policy 0, policy_version 357132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:23,975][24592] Fps is (10 sec: 35226.0, 60 sec: 37273.9, 300 sec: 38655.2). Total num frames: 2925682688. Throughput: 0: 8981.2. Samples: 481419912. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:23,977][24592] Avg episode reward: [(0, '4.661')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:24,521][626795] Updated weights for policy 0, policy_version 357142 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:26,642][626795] Updated weights for policy 0, policy_version 357152 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:28,662][626795] Updated weights for policy 0, policy_version 357162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:28,976][24592] Fps is (10 sec: 39320.9, 60 sec: 37000.4, 300 sec: 38599.8). Total num frames: 2925879296. Throughput: 0: 8931.4. Samples: 481447794. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:28,976][24592] Avg episode reward: [(0, '4.592')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:30,763][626795] Updated weights for policy 0, policy_version 357172 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:32,719][626795] Updated weights for policy 0, policy_version 357182 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:33,975][24592] Fps is (10 sec: 40140.8, 60 sec: 37000.5, 300 sec: 38571.8). Total num frames: 2926084096. Throughput: 0: 9169.9. Samples: 481508328. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:33,976][24592] Avg episode reward: [(0, '4.478')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:34,770][626795] Updated weights for policy 0, policy_version 357192 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:36,691][626795] Updated weights for policy 0, policy_version 357202 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:38,710][626795] Updated weights for policy 0, policy_version 357212 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:38,976][24592] Fps is (10 sec: 40139.8, 60 sec: 36863.7, 300 sec: 38544.0). Total num frames: 2926280704. Throughput: 0: 9345.6. Samples: 481569858. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:38,980][24592] Avg episode reward: [(0, '4.596')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:40,924][626795] Updated weights for policy 0, policy_version 357222 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:43,032][626795] Updated weights for policy 0, policy_version 357232 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:43,975][24592] Fps is (10 sec: 39321.7, 60 sec: 37410.4, 300 sec: 38488.5). Total num frames: 2926477312. Throughput: 0: 9478.6. Samples: 481599180. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:43,978][24592] Avg episode reward: [(0, '4.590')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:45,467][626795] Updated weights for policy 0, policy_version 357242 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:48,022][626795] Updated weights for policy 0, policy_version 357252 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:48,975][24592] Fps is (10 sec: 35227.2, 60 sec: 36864.0, 300 sec: 38433.1). Total num frames: 2926632960. Throughput: 0: 9431.5. Samples: 481650264. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:48,976][24592] Avg episode reward: [(0, '4.831')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:51,214][626795] Updated weights for policy 0, policy_version 357262 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:53,417][626795] Updated weights for policy 0, policy_version 357272 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:53,975][24592] Fps is (10 sec: 31129.5, 60 sec: 36455.1, 300 sec: 38238.6). Total num frames: 2926788608. Throughput: 0: 9210.5. Samples: 481695960. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:53,977][24592] Avg episode reward: [(0, '4.651')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:55,497][626795] Updated weights for policy 0, policy_version 357282 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:57,609][626795] Updated weights for policy 0, policy_version 357292 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:58,975][24592] Fps is (10 sec: 35225.6, 60 sec: 36864.2, 300 sec: 38183.1). Total num frames: 2926985216. Throughput: 0: 9216.0. Samples: 481725810. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:08:58,976][24592] Avg episode reward: [(0, '4.570')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:08:59,637][626795] Updated weights for policy 0, policy_version 357302 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:01,592][626795] Updated weights for policy 0, policy_version 357312 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:03,561][626795] Updated weights for policy 0, policy_version 357322 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:03,975][24592] Fps is (10 sec: 40141.1, 60 sec: 37410.1, 300 sec: 38155.3). Total num frames: 2927190016. Throughput: 0: 9468.7. Samples: 481786482. Policy #0 lag: (min: 0.0, avg: 2.2, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:03,976][24592] Avg episode reward: [(0, '4.535')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:04,079][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000357324_2927198208.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:04,146][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000356210_2918072320.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:05,866][626795] Updated weights for policy 0, policy_version 357332 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:07,766][626795] Updated weights for policy 0, policy_version 357342 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:08,975][24592] Fps is (10 sec: 40959.8, 60 sec: 37683.2, 300 sec: 38183.0). Total num frames: 2927394816. Throughput: 0: 9471.3. Samples: 481846122. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:08,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:09,788][626795] Updated weights for policy 0, policy_version 357352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:11,790][626795] Updated weights for policy 0, policy_version 357362 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:13,763][626795] Updated weights for policy 0, policy_version 357372 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:13,975][24592] Fps is (10 sec: 40959.8, 60 sec: 37819.8, 300 sec: 38155.3). Total num frames: 2927599616. Throughput: 0: 9538.4. Samples: 481877022. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:13,977][24592] Avg episode reward: [(0, '4.660')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:15,790][626795] Updated weights for policy 0, policy_version 357382 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:17,804][626795] Updated weights for policy 0, policy_version 357392 (0.0021)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:18,976][24592] Fps is (10 sec: 40140.1, 60 sec: 38502.3, 300 sec: 38202.4). Total num frames: 2927796224. Throughput: 0: 9549.7. Samples: 481938066. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:18,977][24592] Avg episode reward: [(0, '4.538')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:20,388][626795] Updated weights for policy 0, policy_version 357402 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:23,684][626795] Updated weights for policy 0, policy_version 357412 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:23,975][24592] Fps is (10 sec: 32767.8, 60 sec: 37410.1, 300 sec: 37960.9). Total num frames: 2927927296. Throughput: 0: 9135.0. Samples: 481980930. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:23,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:25,893][626795] Updated weights for policy 0, policy_version 357422 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:28,039][626795] Updated weights for policy 0, policy_version 357432 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:28,976][24592] Fps is (10 sec: 31946.2, 60 sec: 37273.1, 300 sec: 37905.3). Total num frames: 2928115712. Throughput: 0: 9069.5. Samples: 482007318. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:28,978][24592] Avg episode reward: [(0, '4.524')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:30,361][626795] Updated weights for policy 0, policy_version 357442 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:32,248][626795] Updated weights for policy 0, policy_version 357452 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:33,976][24592] Fps is (10 sec: 38501.4, 60 sec: 37136.9, 300 sec: 37849.8). Total num frames: 2928312320. Throughput: 0: 9246.2. Samples: 482066346. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:33,978][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:34,257][626795] Updated weights for policy 0, policy_version 357462 (0.0049)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:36,281][626795] Updated weights for policy 0, policy_version 357472 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:38,307][626795] Updated weights for policy 0, policy_version 357482 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:38,975][24592] Fps is (10 sec: 40144.9, 60 sec: 37273.9, 300 sec: 37822.0). Total num frames: 2928517120. Throughput: 0: 9599.4. Samples: 482127930. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:38,977][24592] Avg episode reward: [(0, '4.576')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:40,283][626795] Updated weights for policy 0, policy_version 357492 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:42,366][626795] Updated weights for policy 0, policy_version 357502 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:43,975][24592] Fps is (10 sec: 40142.0, 60 sec: 37273.6, 300 sec: 37794.3). Total num frames: 2928713728. Throughput: 0: 9588.5. Samples: 482157294. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:43,976][24592] Avg episode reward: [(0, '4.642')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:44,591][626795] Updated weights for policy 0, policy_version 357512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:46,885][626795] Updated weights for policy 0, policy_version 357522 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:48,876][626795] Updated weights for policy 0, policy_version 357532 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:48,975][24592] Fps is (10 sec: 38502.3, 60 sec: 37819.7, 300 sec: 37794.3). Total num frames: 2928902144. Throughput: 0: 9497.5. Samples: 482213868. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:48,976][24592] Avg episode reward: [(0, '4.789')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:50,898][626795] Updated weights for policy 0, policy_version 357542 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:52,949][626795] Updated weights for policy 0, policy_version 357552 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:53,976][24592] Fps is (10 sec: 38498.5, 60 sec: 38501.8, 300 sec: 37989.2). Total num frames: 2929098752. Throughput: 0: 9509.9. Samples: 482274078. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:53,979][24592] Avg episode reward: [(0, '4.474')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:55,221][626795] Updated weights for policy 0, policy_version 357562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:09:58,709][626795] Updated weights for policy 0, policy_version 357572 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:58,975][24592] Fps is (10 sec: 32768.0, 60 sec: 37410.1, 300 sec: 37822.0). Total num frames: 2929229824. Throughput: 0: 9209.7. Samples: 482291460. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:09:58,976][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:01,095][626795] Updated weights for policy 0, policy_version 357582 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:03,193][626795] Updated weights for policy 0, policy_version 357592 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:03,975][24592] Fps is (10 sec: 31952.0, 60 sec: 37137.0, 300 sec: 37795.3). Total num frames: 2929418240. Throughput: 0: 9010.6. Samples: 482343540. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:03,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:05,694][626795] Updated weights for policy 0, policy_version 357602 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:08,030][626795] Updated weights for policy 0, policy_version 357612 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:08,975][24592] Fps is (10 sec: 35225.5, 60 sec: 36454.4, 300 sec: 37766.5). Total num frames: 2929582080. Throughput: 0: 9213.6. Samples: 482395542. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:08,976][24592] Avg episode reward: [(0, '4.586')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:10,466][626795] Updated weights for policy 0, policy_version 357622 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:12,634][626795] Updated weights for policy 0, policy_version 357632 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:13,975][24592] Fps is (10 sec: 35225.3, 60 sec: 36181.3, 300 sec: 37738.7). Total num frames: 2929770496. Throughput: 0: 9199.5. Samples: 482421288. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:13,977][24592] Avg episode reward: [(0, '4.604')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:14,755][626795] Updated weights for policy 0, policy_version 357642 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:16,955][626795] Updated weights for policy 0, policy_version 357652 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:18,976][24592] Fps is (10 sec: 38501.9, 60 sec: 36181.3, 300 sec: 37766.5). Total num frames: 2929967104. Throughput: 0: 9179.6. Samples: 482479428. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:18,979][24592] Avg episode reward: [(0, '4.551')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:18,979][626795] Updated weights for policy 0, policy_version 357662 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:21,252][626795] Updated weights for policy 0, policy_version 357672 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:23,264][626795] Updated weights for policy 0, policy_version 357682 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:23,975][24592] Fps is (10 sec: 37683.7, 60 sec: 37000.6, 300 sec: 37738.7). Total num frames: 2930147328. Throughput: 0: 9074.3. Samples: 482536272. Policy #0 lag: (min: 0.0, avg: 2.7, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:23,976][24592] Avg episode reward: [(0, '4.700')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:25,590][626795] Updated weights for policy 0, policy_version 357692 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:27,585][626795] Updated weights for policy 0, policy_version 357702 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:29,677][24592] Fps is (10 sec: 34447.4, 60 sec: 36573.4, 300 sec: 37732.3). Total num frames: 2930335744. Throughput: 0: 8911.8. Samples: 482564580. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:29,680][24592] Avg episode reward: [(0, '4.662')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:30,646][626795] Updated weights for policy 0, policy_version 357712 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:32,525][626795] Updated weights for policy 0, policy_version 357722 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:33,976][24592] Fps is (10 sec: 36863.3, 60 sec: 36727.6, 300 sec: 37739.3). Total num frames: 2930515968. Throughput: 0: 8948.5. Samples: 482616552. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:33,976][24592] Avg episode reward: [(0, '4.794')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:34,620][626795] Updated weights for policy 0, policy_version 357732 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:36,540][626795] Updated weights for policy 0, policy_version 357742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:38,597][626795] Updated weights for policy 0, policy_version 357752 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:38,976][24592] Fps is (10 sec: 41407.6, 60 sec: 36727.4, 300 sec: 37711.0). Total num frames: 2930720768. Throughput: 0: 8963.5. Samples: 482677428. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:38,977][24592] Avg episode reward: [(0, '4.645')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:40,618][626795] Updated weights for policy 0, policy_version 357762 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:42,537][626795] Updated weights for policy 0, policy_version 357772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:43,982][24592] Fps is (10 sec: 40932.5, 60 sec: 36859.8, 300 sec: 37710.1). Total num frames: 2930925568. Throughput: 0: 9258.6. Samples: 482708160. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:43,983][24592] Avg episode reward: [(0, '4.609')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:44,524][626795] Updated weights for policy 0, policy_version 357782 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:46,479][626795] Updated weights for policy 0, policy_version 357792 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:48,439][626795] Updated weights for policy 0, policy_version 357802 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:48,977][24592] Fps is (10 sec: 41774.2, 60 sec: 37272.8, 300 sec: 37766.4). Total num frames: 2931138560. Throughput: 0: 9513.3. Samples: 482771652. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:48,979][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:50,424][626795] Updated weights for policy 0, policy_version 357812 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:52,746][626795] Updated weights for policy 0, policy_version 357822 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:53,976][24592] Fps is (10 sec: 40167.0, 60 sec: 37137.5, 300 sec: 37766.5). Total num frames: 2931326976. Throughput: 0: 9665.5. Samples: 482830494. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:53,978][24592] Avg episode reward: [(0, '4.988')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:54,710][626795] Updated weights for policy 0, policy_version 357832 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:56,691][626795] Updated weights for policy 0, policy_version 357842 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:10:58,617][626795] Updated weights for policy 0, policy_version 357852 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:58,975][24592] Fps is (10 sec: 39326.8, 60 sec: 38365.9, 300 sec: 37894.5). Total num frames: 2931531776. Throughput: 0: 9769.8. Samples: 482860926. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:10:58,978][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:00,708][626795] Updated weights for policy 0, policy_version 357862 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:03,627][626795] Updated weights for policy 0, policy_version 357872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:03,975][24592] Fps is (10 sec: 36865.0, 60 sec: 37956.2, 300 sec: 37822.0). Total num frames: 2931695616. Throughput: 0: 9669.2. Samples: 482914542. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:03,976][24592] Avg episode reward: [(0, '4.811')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000357873_2931695616.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:04,131][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000356771_2922668032.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:05,775][626795] Updated weights for policy 0, policy_version 357882 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:07,765][626795] Updated weights for policy 0, policy_version 357892 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:08,976][24592] Fps is (10 sec: 36861.4, 60 sec: 38638.5, 300 sec: 37794.2). Total num frames: 2931900416. Throughput: 0: 9696.8. Samples: 482972634. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:08,978][24592] Avg episode reward: [(0, '4.683')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:09,684][626795] Updated weights for policy 0, policy_version 357902 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:11,634][626795] Updated weights for policy 0, policy_version 357912 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:13,814][626795] Updated weights for policy 0, policy_version 357922 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:13,975][24592] Fps is (10 sec: 40959.9, 60 sec: 38912.0, 300 sec: 37822.1). Total num frames: 2932105216. Throughput: 0: 9932.7. Samples: 483004584. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:13,977][24592] Avg episode reward: [(0, '4.490')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:15,906][626795] Updated weights for policy 0, policy_version 357932 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:17,814][626795] Updated weights for policy 0, policy_version 357942 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:18,975][24592] Fps is (10 sec: 40963.0, 60 sec: 39048.7, 300 sec: 37822.1). Total num frames: 2932310016. Throughput: 0: 9948.2. Samples: 483064218. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:18,976][24592] Avg episode reward: [(0, '4.637')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:19,849][626795] Updated weights for policy 0, policy_version 357952 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:21,758][626795] Updated weights for policy 0, policy_version 357962 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:23,886][626795] Updated weights for policy 0, policy_version 357972 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:23,975][24592] Fps is (10 sec: 40141.2, 60 sec: 39321.6, 300 sec: 37878.6). Total num frames: 2932506624. Throughput: 0: 9968.0. Samples: 483125988. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:23,977][24592] Avg episode reward: [(0, '4.771')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:25,967][626795] Updated weights for policy 0, policy_version 357982 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:28,013][626795] Updated weights for policy 0, policy_version 357992 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:28,975][24592] Fps is (10 sec: 39321.5, 60 sec: 39925.1, 300 sec: 37877.6). Total num frames: 2932703232. Throughput: 0: 9913.9. Samples: 483154218. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:28,976][24592] Avg episode reward: [(0, '4.504')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:30,109][626795] Updated weights for policy 0, policy_version 358002 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:32,154][626795] Updated weights for policy 0, policy_version 358012 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:33,976][24592] Fps is (10 sec: 39321.2, 60 sec: 39731.2, 300 sec: 38016.4). Total num frames: 2932899840. Throughput: 0: 9812.4. Samples: 483213198. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:33,977][24592] Avg episode reward: [(0, '4.921')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:34,333][626795] Updated weights for policy 0, policy_version 358022 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:37,216][626795] Updated weights for policy 0, policy_version 358032 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:38,976][24592] Fps is (10 sec: 36044.0, 60 sec: 39048.5, 300 sec: 37960.9). Total num frames: 2933063680. Throughput: 0: 9632.6. Samples: 483263958. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:38,978][24592] Avg episode reward: [(0, '4.479')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:39,385][626795] Updated weights for policy 0, policy_version 358042 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:41,440][626795] Updated weights for policy 0, policy_version 358052 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:43,239][626795] Updated weights for policy 0, policy_version 358062 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:43,975][24592] Fps is (10 sec: 36864.7, 60 sec: 39053.1, 300 sec: 37988.7). Total num frames: 2933268480. Throughput: 0: 9643.3. Samples: 483294876. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:43,976][24592] Avg episode reward: [(0, '4.903')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:45,241][626795] Updated weights for policy 0, policy_version 358072 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:47,121][626795] Updated weights for policy 0, policy_version 358082 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:48,975][24592] Fps is (10 sec: 40961.0, 60 sec: 38912.9, 300 sec: 38044.2). Total num frames: 2933473280. Throughput: 0: 9840.3. Samples: 483357354. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:48,976][24592] Avg episode reward: [(0, '4.787')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:49,189][626795] Updated weights for policy 0, policy_version 358092 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:51,108][626795] Updated weights for policy 0, policy_version 358102 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:53,323][626795] Updated weights for policy 0, policy_version 358112 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:53,975][24592] Fps is (10 sec: 40140.5, 60 sec: 39048.8, 300 sec: 38016.5). Total num frames: 2933669888. Throughput: 0: 9878.4. Samples: 483417156. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:53,976][24592] Avg episode reward: [(0, '4.600')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:55,482][626795] Updated weights for policy 0, policy_version 358122 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:57,457][626795] Updated weights for policy 0, policy_version 358132 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:58,976][24592] Fps is (10 sec: 40140.3, 60 sec: 39048.5, 300 sec: 37988.6). Total num frames: 2933874688. Throughput: 0: 9820.0. Samples: 483446484. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:11:58,977][24592] Avg episode reward: [(0, '4.689')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:11:59,494][626795] Updated weights for policy 0, policy_version 358142 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:01,641][626795] Updated weights for policy 0, policy_version 358152 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:03,559][626795] Updated weights for policy 0, policy_version 358162 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:03,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39731.2, 300 sec: 38016.5). Total num frames: 2934079488. Throughput: 0: 9828.7. Samples: 483506508. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:03,976][24592] Avg episode reward: [(0, '4.827')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:05,612][626795] Updated weights for policy 0, policy_version 358172 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:07,608][626795] Updated weights for policy 0, policy_version 358182 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:09,031][24592] Fps is (10 sec: 36662.1, 60 sec: 39013.1, 300 sec: 37981.6). Total num frames: 2934243328. Throughput: 0: 9136.4. Samples: 483537630. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:09,032][24592] Avg episode reward: [(0, '4.501')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:10,581][626795] Updated weights for policy 0, policy_version 358192 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:12,536][626795] Updated weights for policy 0, policy_version 358202 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:13,975][24592] Fps is (10 sec: 36045.1, 60 sec: 38912.1, 300 sec: 37988.7). Total num frames: 2934439936. Throughput: 0: 9677.0. Samples: 483589680. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:13,976][24592] Avg episode reward: [(0, '4.714')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:14,756][626795] Updated weights for policy 0, policy_version 358212 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:16,691][626795] Updated weights for policy 0, policy_version 358222 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:18,570][626795] Updated weights for policy 0, policy_version 358232 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:18,975][24592] Fps is (10 sec: 41187.5, 60 sec: 39048.5, 300 sec: 37988.7). Total num frames: 2934652928. Throughput: 0: 9700.6. Samples: 483649722. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:18,976][24592] Avg episode reward: [(0, '4.864')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:20,515][626795] Updated weights for policy 0, policy_version 358242 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:22,489][626795] Updated weights for policy 0, policy_version 358252 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:23,975][24592] Fps is (10 sec: 41778.8, 60 sec: 39185.1, 300 sec: 37960.9). Total num frames: 2934857728. Throughput: 0: 9982.3. Samples: 483713160. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:23,976][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:24,479][626795] Updated weights for policy 0, policy_version 358262 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:26,470][626795] Updated weights for policy 0, policy_version 358272 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:28,242][626795] Updated weights for policy 0, policy_version 358282 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:28,975][24592] Fps is (10 sec: 41778.7, 60 sec: 39458.1, 300 sec: 37988.7). Total num frames: 2935070720. Throughput: 0: 9988.2. Samples: 483744348. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:28,976][24592] Avg episode reward: [(0, '4.666')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:30,356][626795] Updated weights for policy 0, policy_version 358292 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:32,459][626795] Updated weights for policy 0, policy_version 358302 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:33,975][24592] Fps is (10 sec: 40960.1, 60 sec: 39458.2, 300 sec: 37960.9). Total num frames: 2935267328. Throughput: 0: 9960.0. Samples: 483805554. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:33,976][24592] Avg episode reward: [(0, '4.665')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:34,463][626795] Updated weights for policy 0, policy_version 358312 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:36,498][626795] Updated weights for policy 0, policy_version 358322 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:38,334][626795] Updated weights for policy 0, policy_version 358332 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:38,977][24592] Fps is (10 sec: 40953.4, 60 sec: 40276.4, 300 sec: 38127.3). Total num frames: 2935480320. Throughput: 0: 10004.4. Samples: 483867372. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:38,979][24592] Avg episode reward: [(0, '4.763')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:40,502][626795] Updated weights for policy 0, policy_version 358342 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:43,278][626795] Updated weights for policy 0, policy_version 358352 (0.0007)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:43,976][24592] Fps is (10 sec: 37681.8, 60 sec: 39594.4, 300 sec: 38044.2). Total num frames: 2935644160. Throughput: 0: 9964.5. Samples: 483894888. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:43,977][24592] Avg episode reward: [(0, '4.820')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:45,326][626795] Updated weights for policy 0, policy_version 358362 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:47,223][626795] Updated weights for policy 0, policy_version 358372 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:48,975][24592] Fps is (10 sec: 36870.3, 60 sec: 39594.7, 300 sec: 38127.7). Total num frames: 2935848960. Throughput: 0: 9867.7. Samples: 483950556. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:48,977][24592] Avg episode reward: [(0, '4.407')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:49,264][626795] Updated weights for policy 0, policy_version 358382 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:51,250][626795] Updated weights for policy 0, policy_version 358392 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:53,306][626795] Updated weights for policy 0, policy_version 358402 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:53,976][24592] Fps is (10 sec: 40141.4, 60 sec: 39594.5, 300 sec: 38210.8). Total num frames: 2936045568. Throughput: 0: 10533.1. Samples: 484011042. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:53,976][24592] Avg episode reward: [(0, '4.627')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:55,372][626795] Updated weights for policy 0, policy_version 358412 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:57,318][626795] Updated weights for policy 0, policy_version 358422 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:58,975][24592] Fps is (10 sec: 40959.8, 60 sec: 39731.3, 300 sec: 38349.7). Total num frames: 2936258560. Throughput: 0: 10048.2. Samples: 484041852. Policy #0 lag: (min: 0.0, avg: 2.5, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:12:58,977][24592] Avg episode reward: [(0, '4.749')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:12:59,371][626795] Updated weights for policy 0, policy_version 358432 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:01,298][626795] Updated weights for policy 0, policy_version 358442 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:03,273][626795] Updated weights for policy 0, policy_version 358452 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:03,975][24592] Fps is (10 sec: 41780.1, 60 sec: 39731.2, 300 sec: 38405.2). Total num frames: 2936463360. Throughput: 0: 10095.7. Samples: 484104030. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:03,976][24592] Avg episode reward: [(0, '4.972')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:03,981][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000358455_2936463360.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:04,133][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000357324_2927198208.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:05,463][626795] Updated weights for policy 0, policy_version 358462 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:07,358][626795] Updated weights for policy 0, policy_version 358472 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:08,976][24592] Fps is (10 sec: 40140.4, 60 sec: 40314.3, 300 sec: 38405.2). Total num frames: 2936659968. Throughput: 0: 10020.2. Samples: 484164072. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:08,978][24592] Avg episode reward: [(0, '4.997')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:09,486][626795] Updated weights for policy 0, policy_version 358482 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:11,441][626795] Updated weights for policy 0, policy_version 358492 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:13,621][626795] Updated weights for policy 0, policy_version 358502 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:13,975][24592] Fps is (10 sec: 39321.7, 60 sec: 40277.3, 300 sec: 38544.1). Total num frames: 2936856576. Throughput: 0: 9999.5. Samples: 484194324. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:13,977][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:16,450][626795] Updated weights for policy 0, policy_version 358512 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:18,506][626795] Updated weights for policy 0, policy_version 358522 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:18,977][24592] Fps is (10 sec: 36040.8, 60 sec: 39457.3, 300 sec: 38432.8). Total num frames: 2937020416. Throughput: 0: 9765.3. Samples: 484245006. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:18,977][24592] Avg episode reward: [(0, '4.812')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:20,504][626795] Updated weights for policy 0, policy_version 358532 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:22,547][626795] Updated weights for policy 0, policy_version 358542 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:23,975][24592] Fps is (10 sec: 37683.2, 60 sec: 39594.7, 300 sec: 38488.5). Total num frames: 2937233408. Throughput: 0: 9769.4. Samples: 484306980. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:23,976][24592] Avg episode reward: [(0, '4.695')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:24,539][626795] Updated weights for policy 0, policy_version 358552 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:26,487][626795] Updated weights for policy 0, policy_version 358562 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:28,414][626795] Updated weights for policy 0, policy_version 358572 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:28,976][24592] Fps is (10 sec: 41782.8, 60 sec: 39457.9, 300 sec: 38488.5). Total num frames: 2937438208. Throughput: 0: 9830.4. Samples: 484337256. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:28,977][24592] Avg episode reward: [(0, '4.786')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:30,473][626795] Updated weights for policy 0, policy_version 358582 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:32,621][626795] Updated weights for policy 0, policy_version 358592 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:33,975][24592] Fps is (10 sec: 39321.3, 60 sec: 39321.6, 300 sec: 38460.8). Total num frames: 2937626624. Throughput: 0: 9907.1. Samples: 484396374. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:33,979][24592] Avg episode reward: [(0, '4.319')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:34,971][626795] Updated weights for policy 0, policy_version 358602 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:37,025][626795] Updated weights for policy 0, policy_version 358612 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:38,975][24592] Fps is (10 sec: 38503.4, 60 sec: 39049.6, 300 sec: 38460.7). Total num frames: 2937823232. Throughput: 0: 9867.8. Samples: 484455090. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:38,978][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:39,032][626795] Updated weights for policy 0, policy_version 358622 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:41,276][626795] Updated weights for policy 0, policy_version 358632 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:43,168][626795] Updated weights for policy 0, policy_version 358642 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:43,975][24592] Fps is (10 sec: 39322.0, 60 sec: 39595.0, 300 sec: 38599.6). Total num frames: 2938019840. Throughput: 0: 9808.3. Samples: 484483224. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:43,977][24592] Avg episode reward: [(0, '4.509')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:45,243][626795] Updated weights for policy 0, policy_version 358652 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:47,222][626795] Updated weights for policy 0, policy_version 358662 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:48,975][24592] Fps is (10 sec: 36864.5, 60 sec: 39048.5, 300 sec: 38655.1). Total num frames: 2938191872. Throughput: 0: 9783.6. Samples: 484544292. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:48,977][24592] Avg episode reward: [(0, '4.615')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:50,080][626795] Updated weights for policy 0, policy_version 358672 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:52,097][626795] Updated weights for policy 0, policy_version 358682 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:53,975][24592] Fps is (10 sec: 36863.8, 60 sec: 39048.7, 300 sec: 38655.1). Total num frames: 2938388480. Throughput: 0: 9601.0. Samples: 484596114. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:53,977][24592] Avg episode reward: [(0, '4.658')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:54,398][626795] Updated weights for policy 0, policy_version 358692 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:55,228][626772] Signal inference workers to stop experience collection... (6200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:55,230][626772] Signal inference workers to resume experience collection... (6200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:55,245][626795] InferenceWorker_p0-w0: stopping experience collection (6200 times)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:55,251][626795] InferenceWorker_p0-w0: resuming experience collection (6200 times)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:56,370][626795] Updated weights for policy 0, policy_version 358702 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:13:58,301][626795] Updated weights for policy 0, policy_version 358712 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:58,981][24592] Fps is (10 sec: 39297.5, 60 sec: 38771.5, 300 sec: 38626.6). Total num frames: 2938585088. Throughput: 0: 9575.1. Samples: 484625262. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:13:58,982][24592] Avg episode reward: [(0, '4.709')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:00,667][626795] Updated weights for policy 0, policy_version 358722 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:02,504][626795] Updated weights for policy 0, policy_version 358732 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:03,975][24592] Fps is (10 sec: 40140.5, 60 sec: 38775.4, 300 sec: 38627.4). Total num frames: 2938789888. Throughput: 0: 9779.2. Samples: 484685058. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:03,976][24592] Avg episode reward: [(0, '4.630')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:04,534][626795] Updated weights for policy 0, policy_version 358742 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:06,408][626795] Updated weights for policy 0, policy_version 358752 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:08,490][626795] Updated weights for policy 0, policy_version 358762 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:08,975][24592] Fps is (10 sec: 40985.5, 60 sec: 38912.2, 300 sec: 38627.4). Total num frames: 2938994688. Throughput: 0: 9794.4. Samples: 484747728. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:08,976][24592] Avg episode reward: [(0, '4.795')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:10,508][626795] Updated weights for policy 0, policy_version 358772 (0.0007)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:12,632][626795] Updated weights for policy 0, policy_version 358782 (0.0027)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:13,975][24592] Fps is (10 sec: 40140.7, 60 sec: 38911.9, 300 sec: 38627.4). Total num frames: 2939191296. Throughput: 0: 9796.9. Samples: 484778112. Policy #0 lag: (min: 0.0, avg: 2.1, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:13,977][24592] Avg episode reward: [(0, '4.568')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:14,631][626795] Updated weights for policy 0, policy_version 358792 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:16,795][626795] Updated weights for policy 0, policy_version 358802 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:18,715][626795] Updated weights for policy 0, policy_version 358812 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:18,976][24592] Fps is (10 sec: 40136.7, 60 sec: 39594.9, 300 sec: 38877.2). Total num frames: 2939396096. Throughput: 0: 9765.4. Samples: 484835826. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:18,977][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:21,951][626795] Updated weights for policy 0, policy_version 358822 (0.0025)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:23,925][626795] Updated weights for policy 0, policy_version 358832 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:23,976][24592] Fps is (10 sec: 36044.0, 60 sec: 38638.7, 300 sec: 38766.3). Total num frames: 2939551744. Throughput: 0: 9574.2. Samples: 484885932. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:23,977][24592] Avg episode reward: [(0, '4.445')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:26,356][626795] Updated weights for policy 0, policy_version 358842 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:28,531][626795] Updated weights for policy 0, policy_version 358852 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:28,976][24592] Fps is (10 sec: 32770.0, 60 sec: 38092.8, 300 sec: 38682.9). Total num frames: 2939723776. Throughput: 0: 9523.5. Samples: 484911786. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:28,976][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:30,695][626795] Updated weights for policy 0, policy_version 358862 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:32,857][626795] Updated weights for policy 0, policy_version 358872 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:33,976][24592] Fps is (10 sec: 37683.3, 60 sec: 38365.7, 300 sec: 38682.9). Total num frames: 2939928576. Throughput: 0: 9432.1. Samples: 484968738. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:33,977][24592] Avg episode reward: [(0, '4.639')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:34,899][626795] Updated weights for policy 0, policy_version 358882 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:36,936][626795] Updated weights for policy 0, policy_version 358892 (0.0006)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:38,929][626795] Updated weights for policy 0, policy_version 358902 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:38,975][24592] Fps is (10 sec: 40142.1, 60 sec: 38365.9, 300 sec: 38682.9). Total num frames: 2940125184. Throughput: 0: 9611.6. Samples: 485028636. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:38,977][24592] Avg episode reward: [(0, '4.694')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:40,949][626795] Updated weights for policy 0, policy_version 358912 (0.0022)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:42,939][626795] Updated weights for policy 0, policy_version 358922 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:43,976][24592] Fps is (10 sec: 40140.7, 60 sec: 38502.2, 300 sec: 38738.4). Total num frames: 2940329984. Throughput: 0: 9662.8. Samples: 485060034. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:43,979][24592] Avg episode reward: [(0, '4.549')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:45,259][626795] Updated weights for policy 0, policy_version 358932 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:47,281][626795] Updated weights for policy 0, policy_version 358942 (0.0013)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:48,975][24592] Fps is (10 sec: 39321.5, 60 sec: 38775.4, 300 sec: 38710.8). Total num frames: 2940518400. Throughput: 0: 9612.4. Samples: 485117616. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:48,976][24592] Avg episode reward: [(0, '4.636')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:49,266][626795] Updated weights for policy 0, policy_version 358952 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:51,341][626795] Updated weights for policy 0, policy_version 358962 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:53,534][626795] Updated weights for policy 0, policy_version 358972 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:54,691][24592] Fps is (10 sec: 35168.9, 60 sec: 38183.8, 300 sec: 38811.0). Total num frames: 2940706816. Throughput: 0: 9387.3. Samples: 485176872. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:54,693][24592] Avg episode reward: [(0, '4.843')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:56,514][626795] Updated weights for policy 0, policy_version 358982 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:14:58,584][626795] Updated weights for policy 0, policy_version 358992 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:58,975][24592] Fps is (10 sec: 36044.7, 60 sec: 38233.2, 300 sec: 38849.5). Total num frames: 2940878848. Throughput: 0: 9308.8. Samples: 485197008. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:14:58,976][24592] Avg episode reward: [(0, '4.487')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:00,664][626795] Updated weights for policy 0, policy_version 359002 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:02,621][626795] Updated weights for policy 0, policy_version 359012 (0.0024)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:03,975][24592] Fps is (10 sec: 39704.0, 60 sec: 38092.9, 300 sec: 38960.6). Total num frames: 2941075456. Throughput: 0: 9371.8. Samples: 485257548. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:03,986][24592] Avg episode reward: [(0, '4.422')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:03,995][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000359018_2941075456.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:04,150][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000357873_2931695616.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:04,900][626795] Updated weights for policy 0, policy_version 359022 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:06,842][626795] Updated weights for policy 0, policy_version 359032 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:08,920][626795] Updated weights for policy 0, policy_version 359042 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:08,975][24592] Fps is (10 sec: 39321.9, 60 sec: 37956.2, 300 sec: 38988.4). Total num frames: 2941272064. Throughput: 0: 9560.6. Samples: 485316156. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:08,977][24592] Avg episode reward: [(0, '4.640')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:10,900][626795] Updated weights for policy 0, policy_version 359052 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:12,920][626795] Updated weights for policy 0, policy_version 359062 (0.0017)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:13,975][24592] Fps is (10 sec: 40959.9, 60 sec: 38229.4, 300 sec: 39043.9). Total num frames: 2941485056. Throughput: 0: 9676.3. Samples: 485347218. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:13,977][24592] Avg episode reward: [(0, '4.383')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:14,746][626795] Updated weights for policy 0, policy_version 359072 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:16,854][626795] Updated weights for policy 0, policy_version 359082 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:18,797][626795] Updated weights for policy 0, policy_version 359092 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:18,975][24592] Fps is (10 sec: 40959.4, 60 sec: 38093.3, 300 sec: 39099.4). Total num frames: 2941681664. Throughput: 0: 9778.2. Samples: 485408754. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:18,977][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:20,758][626795] Updated weights for policy 0, policy_version 359102 (0.0018)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:22,698][626795] Updated weights for policy 0, policy_version 359112 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:23,976][24592] Fps is (10 sec: 40958.9, 60 sec: 39048.6, 300 sec: 39276.1). Total num frames: 2941894656. Throughput: 0: 9843.8. Samples: 485471610. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:23,977][24592] Avg episode reward: [(0, '4.755')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:24,733][626795] Updated weights for policy 0, policy_version 359122 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:26,666][626795] Updated weights for policy 0, policy_version 359132 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:28,975][24592] Fps is (10 sec: 37683.6, 60 sec: 38912.2, 300 sec: 39127.2). Total num frames: 2942058496. Throughput: 0: 9836.1. Samples: 485502654. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:28,976][24592] Avg episode reward: [(0, '4.730')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:29,663][626795] Updated weights for policy 0, policy_version 359142 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:31,554][626795] Updated weights for policy 0, policy_version 359152 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:33,494][626795] Updated weights for policy 0, policy_version 359162 (0.0020)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:33,975][24592] Fps is (10 sec: 37684.2, 60 sec: 39048.7, 300 sec: 39155.0). Total num frames: 2942271488. Throughput: 0: 9720.8. Samples: 485555052. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:33,976][24592] Avg episode reward: [(0, '4.569')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:35,527][626795] Updated weights for policy 0, policy_version 359172 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:37,468][626795] Updated weights for policy 0, policy_version 359182 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:38,975][24592] Fps is (10 sec: 41779.4, 60 sec: 39185.1, 300 sec: 39155.9). Total num frames: 2942476288. Throughput: 0: 9946.8. Samples: 485617362. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:38,976][24592] Avg episode reward: [(0, '4.621')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:39,570][626795] Updated weights for policy 0, policy_version 359192 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:41,446][626795] Updated weights for policy 0, policy_version 359202 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:43,431][626795] Updated weights for policy 0, policy_version 359212 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:43,977][24592] Fps is (10 sec: 40953.4, 60 sec: 39184.2, 300 sec: 39127.2). Total num frames: 2942681088. Throughput: 0: 10033.0. Samples: 485648508. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:43,980][24592] Avg episode reward: [(0, '4.562')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:45,337][626795] Updated weights for policy 0, policy_version 359222 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:47,426][626795] Updated weights for policy 0, policy_version 359232 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:48,975][24592] Fps is (10 sec: 40959.9, 60 sec: 39458.1, 300 sec: 39182.8). Total num frames: 2942885888. Throughput: 0: 10065.5. Samples: 485710494. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:48,977][24592] Avg episode reward: [(0, '4.678')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:49,471][626795] Updated weights for policy 0, policy_version 359242 (0.0021)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:51,518][626795] Updated weights for policy 0, policy_version 359252 (0.0008)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:53,434][626795] Updated weights for policy 0, policy_version 359262 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:53,975][24592] Fps is (10 sec: 40966.9, 60 sec: 40210.6, 300 sec: 39182.8). Total num frames: 2943090688. Throughput: 0: 10108.4. Samples: 485771034. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:53,978][24592] Avg episode reward: [(0, '4.682')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:55,762][626795] Updated weights for policy 0, policy_version 359272 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:15:57,721][626795] Updated weights for policy 0, policy_version 359282 (0.0029)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:58,977][24592] Fps is (10 sec: 39313.7, 60 sec: 40003.0, 300 sec: 39265.8). Total num frames: 2943279104. Throughput: 0: 10068.6. Samples: 485800326. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:15:58,980][24592] Avg episode reward: [(0, '4.544')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:00,003][626795] Updated weights for policy 0, policy_version 359292 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:02,902][626795] Updated weights for policy 0, policy_version 359302 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:03,975][24592] Fps is (10 sec: 33586.6, 60 sec: 39185.0, 300 sec: 39071.8). Total num frames: 2943426560. Throughput: 0: 9777.5. Samples: 485848740. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:03,978][24592] Avg episode reward: [(0, '4.581')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:05,660][626795] Updated weights for policy 0, policy_version 359312 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:07,802][626795] Updated weights for policy 0, policy_version 359322 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:08,976][24592] Fps is (10 sec: 31954.4, 60 sec: 38775.3, 300 sec: 38960.6). Total num frames: 2943598592. Throughput: 0: 9498.3. Samples: 485899032. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:08,976][24592] Avg episode reward: [(0, '4.589')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:10,396][626795] Updated weights for policy 0, policy_version 359332 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:12,183][626795] Updated weights for policy 0, policy_version 359342 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:13,975][24592] Fps is (10 sec: 36864.6, 60 sec: 38502.4, 300 sec: 38932.8). Total num frames: 2943795200. Throughput: 0: 9430.7. Samples: 485927034. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:13,977][24592] Avg episode reward: [(0, '4.620')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:14,252][626795] Updated weights for policy 0, policy_version 359352 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:16,259][626795] Updated weights for policy 0, policy_version 359362 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:18,174][626795] Updated weights for policy 0, policy_version 359372 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:18,976][24592] Fps is (10 sec: 40140.4, 60 sec: 38638.8, 300 sec: 38960.6). Total num frames: 2944000000. Throughput: 0: 9642.9. Samples: 485988984. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:18,979][24592] Avg episode reward: [(0, '4.574')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:20,124][626795] Updated weights for policy 0, policy_version 359382 (0.0019)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:22,149][626795] Updated weights for policy 0, policy_version 359392 (0.0014)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:23,975][24592] Fps is (10 sec: 41779.2, 60 sec: 38639.1, 300 sec: 39016.1). Total num frames: 2944212992. Throughput: 0: 9619.7. Samples: 486050250. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:23,978][24592] Avg episode reward: [(0, '4.481')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:24,259][626795] Updated weights for policy 0, policy_version 359402 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:26,373][626795] Updated weights for policy 0, policy_version 359412 (0.0028)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:28,472][626795] Updated weights for policy 0, policy_version 359422 (0.0006)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:28,975][24592] Fps is (10 sec: 40142.4, 60 sec: 39048.6, 300 sec: 38988.4). Total num frames: 2944401408. Throughput: 0: 9562.8. Samples: 486078816. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:28,979][24592] Avg episode reward: [(0, '4.653')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:30,506][626795] Updated weights for policy 0, policy_version 359432 (0.0017)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:32,617][626795] Updated weights for policy 0, policy_version 359442 (0.0028)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:34,071][24592] Fps is (10 sec: 34891.2, 60 sec: 38168.4, 300 sec: 38975.7). Total num frames: 2944565248. Throughput: 0: 9495.7. Samples: 486138708. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:34,073][24592] Avg episode reward: [(0, '4.646')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:35,739][626795] Updated weights for policy 0, policy_version 359452 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:37,734][626795] Updated weights for policy 0, policy_version 359462 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:38,976][24592] Fps is (10 sec: 35224.5, 60 sec: 37956.1, 300 sec: 38932.8). Total num frames: 2944753664. Throughput: 0: 9265.8. Samples: 486187998. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:38,978][24592] Avg episode reward: [(0, '4.746')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:39,964][626795] Updated weights for policy 0, policy_version 359472 (0.0020)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:41,948][626795] Updated weights for policy 0, policy_version 359482 (0.0022)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:43,975][24592] Fps is (10 sec: 39701.9, 60 sec: 37957.3, 300 sec: 38932.8). Total num frames: 2944958464. Throughput: 0: 9272.8. Samples: 486217584. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:43,977][24592] Avg episode reward: [(0, '4.443')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:43,979][626795] Updated weights for policy 0, policy_version 359492 (0.0011)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:46,003][626795] Updated weights for policy 0, policy_version 359502 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:48,098][626795] Updated weights for policy 0, policy_version 359512 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:48,975][24592] Fps is (10 sec: 40961.1, 60 sec: 37956.3, 300 sec: 38960.6). Total num frames: 2945163264. Throughput: 0: 9536.3. Samples: 486277872. Policy #0 lag: (min: 0.0, avg: 2.3, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:48,977][24592] Avg episode reward: [(0, '4.473')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:49,897][626795] Updated weights for policy 0, policy_version 359522 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:51,847][626795] Updated weights for policy 0, policy_version 359532 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:53,868][626795] Updated weights for policy 0, policy_version 359542 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:53,975][24592] Fps is (10 sec: 40959.9, 60 sec: 37956.2, 300 sec: 38960.6). Total num frames: 2945368064. Throughput: 0: 9803.3. Samples: 486340176. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:53,976][24592] Avg episode reward: [(0, '4.783')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:55,977][626795] Updated weights for policy 0, policy_version 359552 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:16:58,052][626795] Updated weights for policy 0, policy_version 359562 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:58,975][24592] Fps is (10 sec: 40140.5, 60 sec: 38094.0, 300 sec: 38932.8). Total num frames: 2945564672. Throughput: 0: 9817.7. Samples: 486368832. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:16:58,977][24592] Avg episode reward: [(0, '4.457')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:00,166][626795] Updated weights for policy 0, policy_version 359572 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:02,040][626795] Updated weights for policy 0, policy_version 359582 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:03,976][24592] Fps is (10 sec: 40140.4, 60 sec: 39048.5, 300 sec: 39079.0). Total num frames: 2945769472. Throughput: 0: 9797.4. Samples: 486429864. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:03,977][24592] Avg episode reward: [(0, '4.740')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:03,980][626772] Saving /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000359591_2945769472.pth...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:04,116][626772] Removing /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000358455_2936463360.pth\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:04,328][626795] Updated weights for policy 0, policy_version 359592 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:07,277][626795] Updated weights for policy 0, policy_version 359602 (0.0012)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:08,975][24592] Fps is (10 sec: 36045.2, 60 sec: 38775.7, 300 sec: 38932.8). Total num frames: 2945925120. Throughput: 0: 9543.1. Samples: 486479688. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:08,978][24592] Avg episode reward: [(0, '4.674')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:09,239][626795] Updated weights for policy 0, policy_version 359612 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:11,271][626795] Updated weights for policy 0, policy_version 359622 (0.0015)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:13,339][626795] Updated weights for policy 0, policy_version 359632 (0.0011)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:13,975][24592] Fps is (10 sec: 35226.0, 60 sec: 38775.4, 300 sec: 38877.3). Total num frames: 2946121728. Throughput: 0: 9574.5. Samples: 486509670. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:13,977][24592] Avg episode reward: [(0, '4.603')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:15,477][626795] Updated weights for policy 0, policy_version 359642 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:17,552][626795] Updated weights for policy 0, policy_version 359652 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:18,975][24592] Fps is (10 sec: 39321.4, 60 sec: 38639.2, 300 sec: 38849.5). Total num frames: 2946318336. Throughput: 0: 9597.4. Samples: 486569670. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:18,979][24592] Avg episode reward: [(0, '4.888')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:19,826][626795] Updated weights for policy 0, policy_version 359662 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:21,911][626795] Updated weights for policy 0, policy_version 359672 (0.0014)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:23,918][626795] Updated weights for policy 0, policy_version 359682 (0.0015)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:23,975][24592] Fps is (10 sec: 39321.3, 60 sec: 38365.8, 300 sec: 38794.0). Total num frames: 2946514944. Throughput: 0: 9760.6. Samples: 486627222. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:23,977][24592] Avg episode reward: [(0, '4.881')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:26,119][626795] Updated weights for policy 0, policy_version 359692 (0.0013)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:27,990][626795] Updated weights for policy 0, policy_version 359702 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:28,975][24592] Fps is (10 sec: 40140.8, 60 sec: 38638.9, 300 sec: 38821.8). Total num frames: 2946719744. Throughput: 0: 9741.3. Samples: 486655944. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:28,977][24592] Avg episode reward: [(0, '4.655')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:30,145][626795] Updated weights for policy 0, policy_version 359712 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:32,161][626795] Updated weights for policy 0, policy_version 359722 (0.0019)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:33,975][24592] Fps is (10 sec: 40140.9, 60 sec: 39247.7, 300 sec: 38766.4). Total num frames: 2946916352. Throughput: 0: 9750.1. Samples: 486716628. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:33,977][24592] Avg episode reward: [(0, '4.829')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:34,188][626795] Updated weights for policy 0, policy_version 359732 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:36,181][626795] Updated weights for policy 0, policy_version 359742 (0.0016)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:38,118][626795] Updated weights for policy 0, policy_version 359752 (0.0008)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:38,975][24592] Fps is (10 sec: 40140.8, 60 sec: 39458.3, 300 sec: 38905.1). Total num frames: 2947121152. Throughput: 0: 9711.1. Samples: 486777174. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:38,977][24592] Avg episode reward: [(0, '4.721')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:41,010][626795] Updated weights for policy 0, policy_version 359762 (0.0009)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:43,051][626795] Updated weights for policy 0, policy_version 359772 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:43,975][24592] Fps is (10 sec: 36864.2, 60 sec: 38775.5, 300 sec: 38766.2). Total num frames: 2947284992. Throughput: 0: 9562.7. Samples: 486799152. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:43,976][24592] Avg episode reward: [(0, '4.583')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:45,155][626795] Updated weights for policy 0, policy_version 359782 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:47,138][626795] Updated weights for policy 0, policy_version 359792 (0.0016)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:48,975][24592] Fps is (10 sec: 36044.4, 60 sec: 38638.8, 300 sec: 38766.2). Total num frames: 2947481600. Throughput: 0: 9543.7. Samples: 486859332. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:48,977][24592] Avg episode reward: [(0, '4.792')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:49,203][626795] Updated weights for policy 0, policy_version 359802 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:51,289][626795] Updated weights for policy 0, policy_version 359812 (0.0010)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:53,232][626795] Updated weights for policy 0, policy_version 359822 (0.0010)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:53,975][24592] Fps is (10 sec: 39321.7, 60 sec: 38502.4, 300 sec: 38710.7). Total num frames: 2947678208. Throughput: 0: 9768.5. Samples: 486919272. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:53,979][24592] Avg episode reward: [(0, '4.638')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:55,505][626795] Updated weights for policy 0, policy_version 359832 (0.0012)\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:57,495][626795] Updated weights for policy 0, policy_version 359842 (0.0009)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:58,976][24592] Fps is (10 sec: 39319.8, 60 sec: 38502.1, 300 sec: 38682.8). Total num frames: 2947874816. Throughput: 0: 9754.5. Samples: 486948630. Policy #0 lag: (min: 0.0, avg: 2.4, max: 5.0)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 21:17:58,979][24592] Avg episode reward: [(0, '4.828')]\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 21:17:59,804][626795] Updated weights for policy 0, policy_version 359852 (0.0027)\u001b[0m\n"
+ ]
+ }
+ ],
"source": [
"## Start the training, this should take around 15 minutes\n",
"register_vizdoom_components()\n",
@@ -575,7 +44829,28 @@
"# The scenario we train on today is health gathering\n",
"# other scenarios include \"doom_basic\", \"doom_two_colors_easy\", \"doom_dm\", \"doom_dwango5\", \"doom_my_way_home\", \"doom_deadly_corridor\", \"doom_defend_the_center\", \"doom_defend_the_line\"\n",
"env = \"doom_health_gathering_supreme\"\n",
- "cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=8\", \"--num_envs_per_worker=4\", \"--train_for_env_steps=4000000\"])\n",
+ "cfg = parse_vizdoom_cfg(argv=[\n",
+ " f\"--env={env}\", \n",
+ " \"--num_workers=20\", \n",
+ " \"--num_envs_per_worker=12\", \n",
+ " \"--train_for_env_steps=10000000000\", \n",
+ " '--train_for_seconds=3600000',\n",
+ " '--algo=APPO',\n",
+ " '--gamma=0.99',\n",
+ " '--use_rnn=True', \n",
+ " '--num_epochs=1',\n",
+ " '--rollout=32',\n",
+ " '--recurrence=32',\n",
+ " '--batch_size=2048',\n",
+ " '--benchmark=False',\n",
+ " '--max_grad_norm=0.0',\n",
+ " '--decorrelate_experience_max_seconds=1',\n",
+ " '--nonlinearity=relu',\n",
+ " '--rnn_type=lstm',\n",
+ " '--num_policies=1',\n",
+ " '--heartbeat_reporting_interval=300',\n",
+ " '--seed=6666',\n",
+ " ])\n",
"\n",
"status = run_rl(cfg)"
]
@@ -591,13 +44866,212 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 42,
"metadata": {
"id": "MGSA4Kg5_i0j"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[33m[2025-04-17 08:18:27,967][24592] Environment doom_basic already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,978][24592] Environment doom_two_colors_easy already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,979][24592] Environment doom_two_colors_hard already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,979][24592] Environment doom_dm already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,980][24592] Environment doom_dwango5 already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,980][24592] Environment doom_my_way_home_flat_actions already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,981][24592] Environment doom_defend_the_center_flat_actions already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,981][24592] Environment doom_my_way_home already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,982][24592] Environment doom_deadly_corridor already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,983][24592] Environment doom_defend_the_center already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,983][24592] Environment doom_defend_the_line already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,984][24592] Environment doom_health_gathering already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,984][24592] Environment doom_health_gathering_supreme already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,985][24592] Environment doom_battle already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,985][24592] Environment doom_battle2 already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,986][24592] Environment doom_duel_bots already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,987][24592] Environment doom_deathmatch_bots already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,987][24592] Environment doom_duel already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,988][24592] Environment doom_deathmatch_full already registered, overwriting...\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:27,988][24592] Environment doom_benchmark already registered, overwriting...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:27,989][24592] register_encoder_factory: \u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:28,126][24592] Loading existing experiment configuration from /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/config.json\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,127][24592] Overriding arg 'num_workers' with value 1 passed from command line\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,127][24592] Adding new argument 'no_render'=True that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,128][24592] Adding new argument 'save_video'=True that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,129][24592] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,129][24592] Adding new argument 'video_name'=None that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,130][24592] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,130][24592] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,131][24592] Adding new argument 'push_to_hub'=False that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,131][24592] Adding new argument 'hf_repository'=None that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,132][24592] Adding new argument 'policy_index'=0 that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,133][24592] Adding new argument 'eval_deterministic'=False that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,133][24592] Adding new argument 'train_script'=None that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,134][24592] Adding new argument 'enjoy_script'=None that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,134][24592] Adding new argument 'sample_env_episodes'=256 that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,135][24592] Adding new argument 'csv_folder_name'=None that is not in the saved config file!\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,135][24592] Using frameskip 1 and render_action_repeat=4 for evaluation\u001b[0m\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.num_agents to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.num_agents` for environment variables or `env.get_wrapper_attr('num_agents')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "/root/Lab/ppo-implementation-details/venv39/lib/python3.9/site-packages/gymnasium/core.py:311: UserWarning: \u001b[33mWARN: env.is_multiagent to get variables from other wrappers is deprecated and will be removed in v1.0, to get this variable you can do `env.unwrapped.is_multiagent` for environment variables or `env.get_wrapper_attr('is_multiagent')` that will search the reminding wrappers.\u001b[0m\n",
+ " logger.warn(\n",
+ "\u001b[36m[2025-04-17 08:18:28,204][24592] RunningMeanStd input shape: (3, 72, 128)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,213][24592] RunningMeanStd input shape: (1,)\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,339][24592] ConvEncoder: input_channels=3\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,656][24592] Conv encoder output size: 512\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:28,657][24592] Policy head output size: 512\u001b[0m\n",
+ "\u001b[33m[2025-04-17 08:18:28,802][24592] Loading state from checkpoint /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000122072_1000013824.pth...\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ">>>>>\n",
+ "['/root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000121927_998825984.pth', '/root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/checkpoint_p0/checkpoint_000122072_1000013824.pth']\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m[2025-04-17 08:18:30,292][24592] Num frames 100...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,356][24592] Num frames 200...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,418][24592] Num frames 300...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,478][24592] Num frames 400...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:30,576][24592] Avg episode rewards: #0: 6.800, true rewards: #0: 4.800\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:30,577][24592] Avg episode reward: 6.800, avg true_objective: 4.800\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,590][24592] Num frames 500...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,649][24592] Num frames 600...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,706][24592] Num frames 700...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,763][24592] Num frames 800...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:30,852][24592] Avg episode rewards: #0: 5.320, true rewards: #0: 4.320\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:30,853][24592] Avg episode reward: 5.320, avg true_objective: 4.320\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,875][24592] Num frames 900...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,937][24592] Num frames 1000...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:30,995][24592] Num frames 1100...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,057][24592] Num frames 1200...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:31,140][24592] Avg episode rewards: #0: 4.827, true rewards: #0: 4.160\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:31,141][24592] Avg episode reward: 4.827, avg true_objective: 4.160\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,174][24592] Num frames 1300...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,234][24592] Num frames 1400...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,295][24592] Num frames 1500...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,356][24592] Num frames 1600...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:31,427][24592] Avg episode rewards: #0: 4.580, true rewards: #0: 4.080\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:31,428][24592] Avg episode reward: 4.580, avg true_objective: 4.080\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,470][24592] Num frames 1700...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,530][24592] Num frames 1800...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,589][24592] Num frames 1900...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,647][24592] Num frames 2000...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:31,727][24592] Avg episode rewards: #0: 4.696, true rewards: #0: 4.096\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:31,728][24592] Avg episode reward: 4.696, avg true_objective: 4.096\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,763][24592] Num frames 2100...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,827][24592] Num frames 2200...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,888][24592] Num frames 2300...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:31,947][24592] Num frames 2400...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:32,021][24592] Avg episode rewards: #0: 4.553, true rewards: #0: 4.053\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:32,021][24592] Avg episode reward: 4.553, avg true_objective: 4.053\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,067][24592] Num frames 2500...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,127][24592] Num frames 2600...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,187][24592] Num frames 2700...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,248][24592] Num frames 2800...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:32,328][24592] Avg episode rewards: #0: 4.783, true rewards: #0: 4.069\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:32,329][24592] Avg episode reward: 4.783, avg true_objective: 4.069\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,366][24592] Num frames 2900...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,425][24592] Num frames 3000...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,480][24592] Num frames 3100...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,534][24592] Num frames 3200...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:32,605][24592] Avg episode rewards: #0: 4.665, true rewards: #0: 4.040\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:32,606][24592] Avg episode reward: 4.665, avg true_objective: 4.040\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,645][24592] Num frames 3300...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,700][24592] Num frames 3400...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,757][24592] Num frames 3500...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,814][24592] Num frames 3600...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,882][24592] Num frames 3700...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:32,962][24592] Avg episode rewards: #0: 5.049, true rewards: #0: 4.160\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:32,963][24592] Avg episode reward: 5.049, avg true_objective: 4.160\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:32,999][24592] Num frames 3800...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:33,064][24592] Num frames 3900...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:33,124][24592] Num frames 4000...\u001b[0m\n",
+ "\u001b[36m[2025-04-17 08:18:33,189][24592] Num frames 4100...\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:33,299][24592] Avg episode rewards: #0: 5.092, true rewards: #0: 4.192\u001b[0m\n",
+ "\u001b[37m\u001b[1m[2025-04-17 08:18:33,300][24592] Avg episode reward: 5.092, avg true_objective: 4.192\u001b[0m\n",
+ "ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers\n",
+ " built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)\n",
+ " configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared\n",
+ " libavutil 56. 70.100 / 56. 70.100\n",
+ " libavcodec 58.134.100 / 58.134.100\n",
+ " libavformat 58. 76.100 / 58. 76.100\n",
+ " libavdevice 58. 13.100 / 58. 13.100\n",
+ " libavfilter 7.110.100 / 7.110.100\n",
+ " libswscale 5. 9.100 / 5. 9.100\n",
+ " libswresample 3. 9.100 / 3. 9.100\n",
+ " libpostproc 55. 9.100 / 55. 9.100\n",
+ "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/tmp/sf2_root/replay.mp4':\n",
+ " Metadata:\n",
+ " major_brand : isom\n",
+ " minor_version : 512\n",
+ " compatible_brands: isomiso2mp41\n",
+ " encoder : Lavf59.27.100\n",
+ " Duration: 00:02:00.06, start: 0.000000, bitrate: 683 kb/s\n",
+ " Stream #0:0(und): Video: mpeg4 (Simple Profile) (mp4v / 0x7634706D), yuv420p, 240x180 [SAR 1:1 DAR 4:3], 681 kb/s, 35 fps, 35 tbr, 17920 tbn, 35 tbc (default)\n",
+ " Metadata:\n",
+ " handler_name : VideoHandler\n",
+ " vendor_id : [0][0][0][0]\n",
+ "Stream mapping:\n",
+ " Stream #0:0 -> #0:0 (mpeg4 (native) -> h264 (libx264))\n",
+ "Press [q] to stop, [?] for help\n",
+ "[libx264 @ 0x5616dd8b64c0] using SAR=1/1\n",
+ "[libx264 @ 0x5616dd8b64c0] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2\n",
+ "[libx264 @ 0x5616dd8b64c0] profile High, level 1.3, 4:2:0, 8-bit\n",
+ "[libx264 @ 0x5616dd8b64c0] 264 - core 163 r3060 5db6aa6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2021 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=6 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00\n",
+ "Output #0, mp4, to '/root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/replay.mp4':\n",
+ " Metadata:\n",
+ " major_brand : isom\n",
+ " minor_version : 512\n",
+ " compatible_brands: isomiso2mp41\n",
+ " encoder : Lavf58.76.100\n",
+ " Stream #0:0(und): Video: h264 (avc1 / 0x31637661), yuv420p(progressive), 240x180 [SAR 1:1 DAR 4:3], q=2-31, 35 fps, 17920 tbn (default)\n",
+ " Metadata:\n",
+ " handler_name : VideoHandler\n",
+ " vendor_id : [0][0][0][0]\n",
+ " encoder : Lavc58.134.100 libx264\n",
+ " Side data:\n",
+ " cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: N/A\n",
+ "frame= 4202 fps=2113 q=-1.0 Lsize= 3703kB time=00:01:59.97 bitrate= 252.8kbits/s speed=60.3x \n",
+ "video:3667kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.982092%\n",
+ "[libx264 @ 0x5616dd8b64c0] frame I:29 Avg QP:22.22 size: 4176\n",
+ "[libx264 @ 0x5616dd8b64c0] frame P:2684 Avg QP:24.07 size: 877\n",
+ "[libx264 @ 0x5616dd8b64c0] frame B:1489 Avg QP:27.28 size: 859\n",
+ "[libx264 @ 0x5616dd8b64c0] consecutive B-frames: 48.0% 11.5% 7.8% 32.7%\n",
+ "[libx264 @ 0x5616dd8b64c0] mb I I16..4: 11.1% 79.6% 9.3%\n",
+ "[libx264 @ 0x5616dd8b64c0] mb P I16..4: 2.1% 9.3% 1.3% P16..4: 50.7% 12.5% 4.6% 0.0% 0.0% skip:19.5%\n",
+ "[libx264 @ 0x5616dd8b64c0] mb B I16..4: 0.4% 3.5% 1.3% B16..8: 38.3% 11.6% 3.2% direct: 3.7% skip:38.1% L0:52.1% L1:38.7% BI: 9.2%\n",
+ "[libx264 @ 0x5616dd8b64c0] 8x8 transform intra:72.4% inter:78.0%\n",
+ "[libx264 @ 0x5616dd8b64c0] coded y,uvDC,uvAC intra: 59.3% 45.4% 23.8% inter: 21.2% 9.9% 1.3%\n",
+ "[libx264 @ 0x5616dd8b64c0] i16 v,h,dc,p: 71% 7% 19% 3%\n",
+ "[libx264 @ 0x5616dd8b64c0] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 33% 12% 33% 4% 3% 3% 4% 3% 5%\n",
+ "[libx264 @ 0x5616dd8b64c0] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 61% 9% 11% 3% 4% 2% 5% 2% 3%\n",
+ "[libx264 @ 0x5616dd8b64c0] i8c dc,h,v,p: 66% 15% 18% 1%\n",
+ "[libx264 @ 0x5616dd8b64c0] Weighted P-Frames: Y:1.9% UV:0.5%\n",
+ "[libx264 @ 0x5616dd8b64c0] ref P L0: 76.2% 10.7% 9.2% 3.7% 0.2%\n",
+ "[libx264 @ 0x5616dd8b64c0] ref B L0: 87.3% 9.9% 2.8%\n",
+ "[libx264 @ 0x5616dd8b64c0] ref B L1: 96.5% 3.5%\n",
+ "[libx264 @ 0x5616dd8b64c0] kb/s:250.16\n",
+ "\u001b[36m[2025-04-17 08:18:37,900][24592] Replay video saved to /root/Lab/ppo-implementation-details/unit8.2/train_dir/default_experiment/replay.mp4!\u001b[0m\n"
+ ]
+ }
+ ],
"source": [
"from sample_factory.enjoy import enjoy\n",
+ "\n",
+ "env = \"doom_health_gathering_supreme\"\n",
+ "\n",
+ "## Start the training, this should take around 15 minutes\n",
+ "register_vizdoom_components()\n",
+ "\n",
"cfg = parse_vizdoom_cfg(argv=[f\"--env={env}\", \"--num_workers=1\", \"--save_video\", \"--no_render\", \"--max_num_episodes=10\"], evaluation=True)\n",
"status = enjoy(cfg)"
]
@@ -621,8 +45095,8 @@
"source": [
"from base64 import b64encode\n",
"from IPython.display import HTML\n",
- "\n",
- "mp4 = open('/content/train_dir/default_experiment/replay.mp4','rb').read()\n",
+ " \n",
+ "mp4 = open('./train_dir/default_experiment/replay.mp4','rb').read()\n",
"data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
"HTML(\"\"\"\n",
"