commit e3b481f74e4b345854ebd6db657ec40a5aac18d3
Author: Jake Walker <jacob.walker@qinetiq.com>
Date:   Wed Oct 25 13:53:43 2023 +0100

    Initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a3b5645
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,259 @@
+### JupyterNotebooks ###
+# gitignore template for Jupyter Notebooks
+# website: http://jupyter.org/
+
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+
+# IPython
+profile_default/
+ipython_config.py
+
+# Remove previous ipynb_checkpoints
+#   git rm -r .ipynb_checkpoints/
+
+### Linux ###
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### macOS Patch ###
+# iCloud generated files
+*.icloud
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+
+# IPython
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+
+# Dump file
+*.stackdump
+
+# Folder config file
+[Dd]esktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+HF DeepRL Course/ml-agents
+HF DeepRL Course/results
diff --git a/HF DeepRL Course/Unit1 - Intro.ipynb b/HF DeepRL Course/Unit1 - Intro.ipynb
new file mode 100644
index 0000000..408e52f
--- /dev/null
+++ b/HF DeepRL Course/Unit1 - Intro.ipynb	
@@ -0,0 +1,19 @@
+{"cells":[{"cell_type":"markdown","metadata":{"id":"njb_ProuHiOe"},"source":["# Unit 1: Train your first Deep Reinforcement Learning Agent 🤖\n","\n","![Cover](https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit1/thumbnail.jpg)\n","\n","In this notebook, you'll train your **first Deep Reinforcement Learning agent** a Lunar Lander agent that will learn to **land correctly on the Moon 🌕**. Using [Stable-Baselines3](https://stable-baselines3.readthedocs.io/en/master/) a Deep Reinforcement Learning library, share them with the community, and experiment with different configurations\n","\n","⬇️ Here is an example of what **you will achieve in just a couple of minutes.** ⬇️\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PF46MwbZD00b"},"outputs":[],"source":["%%html\n","<video controls autoplay><source src=\"https://huggingface.co/sb3/ppo-LunarLander-v2/resolve/main/replay.mp4\" type=\"video/mp4\"></video>"]},{"cell_type":"markdown","source":["### The environment 🎮\n","\n","- [LunarLander-v2](https://gymnasium.farama.org/environments/box2d/lunar_lander/)\n","\n","### The library used 📚\n","\n","- [Stable-Baselines3](https://stable-baselines3.readthedocs.io/en/master/)"],"metadata":{"id":"x7oR6R-ZIbeS"}},{"cell_type":"markdown","source":["We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the Github Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"OwEcFHe9RRZW"}},{"cell_type":"markdown","metadata":{"id":"4i6tjI2tHQ8j"},"source":["## Objectives of this notebook 🏆\n","\n","At the end of the notebook, you will:\n","\n","- Be able to use **Gymnasium**, the environment library.\n","- Be able to use **Stable-Baselines3**, the deep reinforcement learning library.\n","- Be able to **push your trained agent to the Hub** with a nice video replay and an evaluation score 🔥.\n","\n","\n"]},{"cell_type":"markdown","source":["## This notebook is from Deep Reinforcement Learning Course\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/deep-rl-course-illustration.jpg\" alt=\"Deep RL Course illustration\"/>"],"metadata":{"id":"Ff-nyJdzJPND"}},{"cell_type":"markdown","metadata":{"id":"6p5HnEefISCB"},"source":["In this free course, you will:\n","\n","- 📖 Study Deep Reinforcement Learning in **theory and practice**.\n","- 🧑‍💻 Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- 🤖 Train **agents in unique environments**\n","- 🎓 **Earn a certificate of completion** by completing 80% of the assignments.\n","\n","And more!\n","\n","Check 📚 the syllabus 👉 https://simoninithomas.github.io/deep-rl-course\n","\n","Don’t forget to **<a href=\"http://eepurl.com/ic5ZUD\">sign up to the course</a>** (we are collecting your email to be able to **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","The best way to keep in touch and ask questions is **to join our discord server** to exchange with the community and with us 👉🏻 https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"Y-mo_6rXIjRi"},"source":["## Prerequisites 🏗️\n","\n","Before diving into the notebook, you need to:\n","\n","🔲 📝 **[Read Unit 0](https://huggingface.co/deep-rl-course/unit0/introduction)** that gives you all the **information about the course and helps you to onboard** 🤗\n","\n","🔲 📚 **Develop an understanding of the foundations of Reinforcement learning** (MC, TD, Rewards hypothesis...) by [reading Unit 1](https://huggingface.co/deep-rl-course/unit1/introduction)."]},{"cell_type":"markdown","source":["## A small recap of Deep Reinforcement Learning 📚\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit1/RL_process_game.jpg\" alt=\"The RL process\" width=\"100%\">"],"metadata":{"id":"HoeqMnr5LuYE"}},{"cell_type":"markdown","metadata":{"id":"xcQYx9ynaFMD"},"source":["Let's do a small recap on what we learned in the first Unit:\n","\n","- Reinforcement Learning is a **computational approach to learning from actions**. We build an agent that learns from the environment by **interacting with it through trial and error** and receiving rewards (negative or positive) as feedback.\n","\n","- The goal of any RL agent is to **maximize its expected cumulative reward** (also called expected return) because RL is based on the _reward hypothesis_, which is that all goals can be described as the maximization of an expected cumulative reward.\n","\n","- The RL process is a **loop that outputs a sequence of state, action, reward, and next state**.\n","\n","- To calculate the expected cumulative reward (expected return), **we discount the rewards**: the rewards that come sooner (at the beginning of the game) are more probable to happen since they are more predictable than the long-term future reward.\n","\n","- To solve an RL problem, you want to **find an optimal policy**; the policy is the \"brain\" of your AI that will tell us what action to take given a state. The optimal one is the one that gives you the actions that max the expected return.\n","\n","There are **two** ways to find your optimal policy:\n","\n","- By **training your policy directly**: policy-based methods.\n","- By **training a value function** that tells us the expected return the agent will get at each state and use this function to define our policy: value-based methods.\n","\n","- Finally, we spoke about Deep RL because **we introduce deep neural networks to estimate the action to take (policy-based) or to estimate the value of a state (value-based) hence the name \"deep.\"**"]},{"cell_type":"markdown","source":["# Let's train our first Deep Reinforcement Learning agent and upload it to the Hub 🚀\n","\n","## Get a certificate 🎓\n","\n","To validate this hands-on for the [certification process](https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process), you need to push your trained model to the Hub and **get a result of >= 200**.\n","\n","To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) and find your model, **the result = mean_reward - std of reward**\n","\n","For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"],"metadata":{"id":"qDploC3jSH99"}},{"cell_type":"markdown","source":["## Set the GPU 💪\n","\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step1.jpg\" alt=\"GPU Step 1\">"],"metadata":{"id":"HqzznTzhNfAC"}},{"cell_type":"markdown","metadata":{"id":"38HBd3t1SHJ8"},"source":["- `Hardware Accelerator > GPU`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step2.jpg\" alt=\"GPU Step 2\">"]},{"cell_type":"markdown","metadata":{"id":"jeDAH0h0EBiG"},"source":["## Install dependencies and create a virtual screen 🔽\n","\n","The first step is to install the dependencies, we’ll install multiple ones.\n","\n","- `gymnasium[box2d]`: Contains the LunarLander-v2 environment 🌛\n","- `stable-baselines3[extra]`: The deep reinforcement learning library.\n","- `huggingface_sb3`: Additional code for Stable-baselines3 to load and upload models from the Hugging Face 🤗 Hub.\n","\n","To make things easier, we created a script to install all these dependencies."]},{"cell_type":"code","source":["!apt install swig cmake"],"metadata":{"id":"yQIGLPDkGhgG"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"9XaULfDZDvrC"},"outputs":[],"source":["!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt"]},{"cell_type":"markdown","source":["During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install virtual screen libraries and create and run a virtual screen 🖥"],"metadata":{"id":"BEKeXQJsQCYm"}},{"cell_type":"code","source":["!sudo apt-get update\n","!sudo apt-get install -y python3-opengl\n","!apt install ffmpeg\n","!apt install xvfb\n","!pip3 install pyvirtualdisplay"],"metadata":{"id":"j5f2cGkdP-mb"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["To make sure the new installed libraries are used, **sometimes it's required to restart the notebook runtime**. The next cell will force the **runtime to crash, so you'll need to connect again and run the code starting from here**. Thanks to this trick, **we will be able to run our virtual screen.**"],"metadata":{"id":"TCwBTAwAW9JJ"}},{"cell_type":"code","source":["import os\n","os.kill(os.getpid(), 9)"],"metadata":{"id":"cYvkbef7XEMi"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"],"metadata":{"id":"BE5JWP5rQIKf"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wrgpVFqyENVf"},"source":["## Import the packages 📦\n","\n","One additional library we import is huggingface_hub **to be able to upload and download trained models from the hub**.\n","\n","\n","The Hugging Face Hub 🤗 works as a central place where anyone can share and explore models and datasets. It has versioning, metrics, visualizations and other features that will allow you to easily collaborate with others.\n","\n","You can see here all the Deep reinforcement Learning models available here👉 https://huggingface.co/models?pipeline_tag=reinforcement-learning&sort=downloads\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cygWLPGsEQ0m"},"outputs":[],"source":["import gymnasium\n","\n","from huggingface_sb3 import load_from_hub, package_to_hub\n","from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.\n","\n","from stable_baselines3 import PPO\n","from stable_baselines3.common.env_util import make_vec_env\n","from stable_baselines3.common.evaluation import evaluate_policy\n","from stable_baselines3.common.monitor import Monitor"]},{"cell_type":"markdown","metadata":{"id":"MRqRuRUl8CsB"},"source":["## Understand Gymnasium and how it works 🤖\n","\n","🏋 The library containing our environment is called Gymnasium.\n","**You'll use Gymnasium a lot in Deep Reinforcement Learning.**\n","\n","Gymnasium is the **new version of Gym library** [maintained by the Farama Foundation](https://farama.org/).\n","\n","The Gymnasium library provides two things:\n","\n","- An interface that allows you to **create RL environments**.\n","- A **collection of environments** (gym-control, atari, box2D...).\n","\n","Let's look at an example, but first let's recall the RL loop.\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit1/RL_process_game.jpg\" alt=\"The RL process\" width=\"100%\">"]},{"cell_type":"markdown","metadata":{"id":"-TzNN0bQ_j-3"},"source":["At each step:\n","- Our Agent receives a **state (S0)** from the **Environment** — we receive the first frame of our game (Environment).\n","- Based on that **state (S0),** the Agent takes an **action (A0)** — our Agent will move to the right.\n","- The environment transitions to a **new** **state (S1)** — new frame.\n","- The environment gives some **reward (R1)** to the Agent — we’re not dead *(Positive Reward +1)*.\n","\n","\n","With Gymnasium:\n","\n","1️⃣ We create our environment using `gymnasium.make()`\n","\n","2️⃣ We reset the environment to its initial state with `observation = env.reset()`\n","\n","At each step:\n","\n","3️⃣ Get an action using our model (in our example we take a random action)\n","\n","4️⃣ Using `env.step(action)`, we perform this action in the environment and get\n","- `observation`: The new state (st+1)\n","- `reward`: The reward we get after executing the action\n","- `terminated`: Indicates if the episode terminated (agent reach the terminal state)\n","- `truncated`: Introduced with this new version, it indicates a timelimit or if an agent go out of bounds of the environment for instance.\n","- `info`: A dictionary that provides additional information (depends on the environment).\n","\n","For more explanations check this 👉 https://gymnasium.farama.org/api/env/#gymnasium.Env.step\n","\n","If the episode is terminated:\n","- We reset the environment to its initial state with `observation = env.reset()`\n","\n","**Let's look at an example!** Make sure to read the code\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"w7vOFlpA_ONz"},"outputs":[],"source":["import gymnasium as gym\n","\n","# First, we create our environment called LunarLander-v2\n","env = gym.make(\"LunarLander-v2\")\n","\n","# Then we reset this environment\n","observation, info = env.reset()\n","\n","for _ in range(20):\n","  # Take a random action\n","  action = env.action_space.sample()\n","  print(\"Action taken:\", action)\n","\n","  # Do this action in the environment and get\n","  # next_state, reward, terminated, truncated and info\n","  observation, reward, terminated, truncated, info = env.step(action)\n","\n","  # If the game is terminated (in our case we land, crashed) or truncated (timeout)\n","  if terminated or truncated:\n","      # Reset the environment\n","      print(\"Environment is reset\")\n","      observation, info = env.reset()\n","\n","env.close()"]},{"cell_type":"markdown","metadata":{"id":"XIrKGGSlENZB"},"source":["## Create the LunarLander environment 🌛 and understand how it works\n","\n","### [The environment 🎮](https://gymnasium.farama.org/environments/box2d/lunar_lander/)\n","\n","In this first tutorial, we’re going to train our agent, a [Lunar Lander](https://gymnasium.farama.org/environments/box2d/lunar_lander/), **to land correctly on the moon**. To do that, the agent needs to learn **to adapt its speed and position (horizontal, vertical, and angular) to land correctly.**\n","\n","---\n","\n","\n","💡 A good habit when you start to use an environment is to check its documentation\n","\n","👉 https://gymnasium.farama.org/environments/box2d/lunar_lander/\n","\n","---\n"]},{"cell_type":"markdown","metadata":{"id":"poLBgRocF9aT"},"source":["Let's see what the Environment looks like:\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZNPG0g_UGCfh"},"outputs":[],"source":["# We create our environment with gym.make(\"<name_of_the_environment>\")\n","env = gym.make(\"LunarLander-v2\")\n","env.reset()\n","print(\"_____OBSERVATION SPACE_____ \\n\")\n","print(\"Observation Space Shape\", env.observation_space.shape)\n","print(\"Sample observation\", env.observation_space.sample()) # Get a random observation"]},{"cell_type":"markdown","metadata":{"id":"2MXc15qFE0M9"},"source":["We see with `Observation Space Shape (8,)` that the observation is a vector of size 8, where each value contains different information about the lander:\n","- Horizontal pad coordinate (x)\n","- Vertical pad coordinate (y)\n","- Horizontal speed (x)\n","- Vertical speed (y)\n","- Angle\n","- Angular speed\n","- If the left leg contact point has touched the land (boolean)\n","- If the right leg contact point has touched the land (boolean)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"We5WqOBGLoSm"},"outputs":[],"source":["print(\"\\n _____ACTION SPACE_____ \\n\")\n","print(\"Action Space Shape\", env.action_space.n)\n","print(\"Action Space Sample\", env.action_space.sample()) # Take a random action"]},{"cell_type":"markdown","metadata":{"id":"MyxXwkI2Magx"},"source":["The action space (the set of possible actions the agent can take) is discrete with 4 actions available 🎮:\n","\n","- Action 0: Do nothing,\n","- Action 1: Fire left orientation engine,\n","- Action 2: Fire the main engine,\n","- Action 3: Fire right orientation engine.\n","\n","Reward function (the function that will gives a reward at each timestep) 💰:\n","\n","After every step a reward is granted. The total reward of an episode is the **sum of the rewards for all the steps within that episode**.\n","\n","For each step, the reward:\n","\n","- Is increased/decreased the closer/further the lander is to the landing pad.\n","-  Is increased/decreased the slower/faster the lander is moving.\n","- Is decreased the more the lander is tilted (angle not horizontal).\n","- Is increased by 10 points for each leg that is in contact with the ground.\n","- Is decreased by 0.03 points each frame a side engine is firing.\n","- Is decreased by 0.3 points each frame the main engine is firing.\n","\n","The episode receive an **additional reward of -100 or +100 points for crashing or landing safely respectively.**\n","\n","An episode is **considered a solution if it scores at least 200 points.**"]},{"cell_type":"markdown","metadata":{"id":"dFD9RAFjG8aq"},"source":["#### Vectorized Environment\n","\n","- We create a vectorized environment (a method for stacking multiple independent environments into a single environment) of 16 environments, this way, **we'll have more diverse experiences during the training.**"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"99hqQ_etEy1N"},"outputs":[],"source":["# Create the environment\n","env = make_vec_env('LunarLander-v2', n_envs=16)"]},{"cell_type":"markdown","metadata":{"id":"VgrE86r5E5IK"},"source":["## Create the Model 🤖\n","- We have studied our environment and we understood the problem: **being able to land the Lunar Lander to the Landing Pad correctly by controlling left, right and main orientation engine**. Now let's build the algorithm we're going to use to solve this Problem 🚀.\n","\n","- To do so, we're going to use our first Deep RL library, [Stable Baselines3 (SB3)](https://stable-baselines3.readthedocs.io/en/master/).\n","\n","- SB3 is a set of **reliable implementations of reinforcement learning algorithms in PyTorch**.\n","\n","---\n","\n","💡 A good habit when using a new library is to dive first on the documentation: https://stable-baselines3.readthedocs.io/en/master/ and then try some tutorials.\n","\n","----"]},{"cell_type":"markdown","source":["<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit1/sb3.png\" alt=\"Stable Baselines3\">"],"metadata":{"id":"HLlClRW37Q7e"}},{"cell_type":"markdown","metadata":{"id":"HV4yiUM_9_Ka"},"source":["To solve this problem, we're going to use SB3 **PPO**. [PPO (aka Proximal Policy Optimization) is one of the SOTA (state of the art) Deep Reinforcement Learning algorithms that you'll study during this course](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#example%5D).\n","\n","PPO is a combination of:\n","- *Value-based reinforcement learning method*: learning an action-value function that will tell us the **most valuable action to take given a state and action**.\n","- *Policy-based reinforcement learning method*: learning a policy that will **give us a probability distribution over actions**."]},{"cell_type":"markdown","metadata":{"id":"5qL_4HeIOrEJ"},"source":["Stable-Baselines3 is easy to set up:\n","\n","1️⃣ You **create your environment** (in our case it was done above)\n","\n","2️⃣ You define the **model you want to use and instantiate this model** `model = PPO(\"MlpPolicy\")`\n","\n","3️⃣ You **train the agent** with `model.learn` and define the number of training timesteps\n","\n","```\n","# Create environment\n","env = gym.make('LunarLander-v2')\n","\n","# Instantiate the agent\n","model = PPO('MlpPolicy', env, verbose=1)\n","# Train the agent\n","model.learn(total_timesteps=int(2e5))\n","```\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"nxI6hT1GE4-A"},"outputs":[],"source":["# TODO: Define a PPO MlpPolicy architecture\n","# We use MultiLayerPerceptron (MLPPolicy) because the input is a vector,\n","# if we had frames as input we would use CnnPolicy\n","model ="]},{"cell_type":"markdown","metadata":{"id":"QAN7B0_HCVZC"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"543OHYDfcjK4"},"outputs":[],"source":["# SOLUTION\n","# We added some parameters to accelerate the training\n","model = PPO(\n","    policy = 'MlpPolicy',\n","    env = env,\n","    n_steps = 1024,\n","    batch_size = 64,\n","    n_epochs = 4,\n","    gamma = 0.999,\n","    gae_lambda = 0.98,\n","    ent_coef = 0.01,\n","    verbose=1)"]},{"cell_type":"markdown","metadata":{"id":"ClJJk88yoBUi"},"source":["## Train the PPO agent 🏃\n","- Let's train our agent for 1,000,000 timesteps, don't forget to use GPU on Colab. It will take approximately ~20min, but you can use fewer timesteps if you just want to try it out.\n","- During the training, take a ☕ break you deserved it 🤗"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"qKnYkNiVp89p"},"outputs":[],"source":["# TODO: Train it for 1,000,000 timesteps\n","\n","# TODO: Specify file name for model and save the model to file\n","model_name = \"\"\n"]},{"cell_type":"markdown","metadata":{"id":"1bQzQ-QcE3zo"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"poBCy9u_csyR"},"outputs":[],"source":["# SOLUTION\n","# Train it for 1,000,000 timesteps\n","model.learn(total_timesteps=1000000)\n","# Save the model\n","model_name = \"ppo-LunarLander-v2\"\n","model.save(model_name)"]},{"cell_type":"markdown","metadata":{"id":"BY_HuedOoISR"},"source":["## Evaluate the agent 📈\n","- Remember to wrap the environment in a [Monitor](https://stable-baselines3.readthedocs.io/en/master/common/monitor.html).\n","- Now that our Lunar Lander agent is trained 🚀, we need to **check its performance**.\n","- Stable-Baselines3 provides a method to do that: `evaluate_policy`.\n","- To fill that part you need to [check the documentation](https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#basic-usage-training-saving-loading)\n","- In the next step,  we'll see **how to automatically evaluate and share your agent to compete in a leaderboard, but for now let's do it ourselves**\n","\n","\n","💡 When you evaluate your agent, you should not use your training environment but create an evaluation environment."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"yRpno0glsADy"},"outputs":[],"source":["# TODO: Evaluate the agent\n","# Create a new environment for evaluation\n","eval_env =\n","\n","# Evaluate the model with 10 evaluation episodes and deterministic=True\n","mean_reward, std_reward =\n","\n","# Print the results\n","\n"]},{"cell_type":"markdown","metadata":{"id":"BqPKw3jt_pG5"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zpz8kHlt_a_m"},"outputs":[],"source":["#@title\n","eval_env = Monitor(gym.make(\"LunarLander-v2\"))\n","mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n","print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")"]},{"cell_type":"markdown","metadata":{"id":"reBhoODwcXfr"},"source":["- In my case, I got a mean reward is `200.20 +/- 20.80` after training for 1 million steps, which means that our lunar lander agent is ready to land on the moon 🌛🥳."]},{"cell_type":"markdown","metadata":{"id":"IK_kR78NoNb2"},"source":["## Publish our trained model on the Hub 🔥\n","Now that we saw we got good results after the training, we can publish our trained model on the hub 🤗 with one line of code.\n","\n","📚 The libraries documentation 👉 https://github.com/huggingface/huggingface_sb3/tree/main#hugging-face--x-stable-baselines3-v20\n","\n","Here's an example of a Model Card (with Space Invaders):"]},{"cell_type":"markdown","metadata":{"id":"Gs-Ew7e1gXN3"},"source":["By using `package_to_hub` **you evaluate, record a replay, generate a model card of your agent and push it to the hub**.\n","\n","This way:\n","- You can **showcase our work** 🔥\n","- You can **visualize your agent playing** 👀\n","- You can **share with the community an agent that others can use** 💾\n","- You can **access a leaderboard 🏆 to see how well your agent is performing compared to your classmates** 👉 https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard\n"]},{"cell_type":"markdown","metadata":{"id":"JquRrWytA6eo"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1️⃣ (If it's not already done) create an account on Hugging Face ➡ https://huggingface.co/join\n","\n","2️⃣ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/create-token.jpg\" alt=\"Create HF Token\">\n","\n","- Copy the token\n","- Run the cell below and paste the token"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"GZiFBBlzxzxY"},"outputs":[],"source":["notebook_login()\n","!git config --global credential.helper store"]},{"cell_type":"markdown","metadata":{"id":"_tsf2uv0g_4p"},"source":["If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login`"]},{"cell_type":"markdown","metadata":{"id":"FGNh9VsZok0i"},"source":["3️⃣ We're now ready to push our trained agent to the 🤗 Hub 🔥 using `package_to_hub()` function"]},{"cell_type":"markdown","metadata":{"id":"Ay24l6bqFF18"},"source":["Let's fill the `package_to_hub` function:\n","- `model`: our trained model.\n","- `model_name`: the name of the trained model that we defined in `model_save`\n","- `model_architecture`: the model architecture we used, in our case PPO\n","- `env_id`: the name of the environment, in our case `LunarLander-v2`\n","- `eval_env`: the evaluation environment defined in eval_env\n","- `repo_id`: the name of the Hugging Face Hub Repository that will be created/updated `(repo_id = {username}/{repo_name})`\n","\n","💡 **A good name is {username}/{model_architecture}-{env_id}**\n","\n","- `commit_message`: message of the commit"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JPG7ofdGIHN8"},"outputs":[],"source":["import gymnasium as gym\n","from stable_baselines3.common.vec_env import DummyVecEnv\n","from stable_baselines3.common.env_util import make_vec_env\n","\n","from huggingface_sb3 import package_to_hub\n","\n","## TODO: Define a repo_id\n","## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n","repo_id =\n","\n","# TODO: Define the name of the environment\n","env_id =\n","\n","# Create the evaluation env and set the render_mode=\"rgb_array\"\n","eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode=\"rgb_array\"))])\n","\n","\n","# TODO: Define the model architecture we used\n","model_architecture = \"\"\n","\n","## TODO: Define the commit message\n","commit_message = \"\"\n","\n","# method save, evaluate, generate a model card and record a replay video of your agent before pushing the repo to the hub\n","package_to_hub(model=model, # Our trained model\n","               model_name=model_name, # The name of our trained model\n","               model_architecture=model_architecture, # The model architecture we used: in our case PPO\n","               env_id=env_id, # Name of the environment\n","               eval_env=eval_env, # Evaluation Environment\n","               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n","               commit_message=commit_message)"]},{"cell_type":"markdown","metadata":{"id":"Avf6gufJBGMw"},"source":["#### Solution\n"]},{"cell_type":"code","source":["import gymnasium as gym\n","\n","from stable_baselines3 import PPO\n","from stable_baselines3.common.vec_env import DummyVecEnv\n","from stable_baselines3.common.env_util import make_vec_env\n","\n","from huggingface_sb3 import package_to_hub\n","\n","# PLACE the variables you've just defined two cells above\n","# Define the name of the environment\n","env_id = \"LunarLander-v2\"\n","\n","# TODO: Define the model architecture we used\n","model_architecture = \"PPO\"\n","\n","## Define a repo_id\n","## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n","## CHANGE WITH YOUR REPO ID\n","repo_id = \"ThomasSimonini/ppo-LunarLander-v2\" # Change with your repo id, you can't push with mine 😄\n","\n","## Define the commit message\n","commit_message = \"Upload PPO LunarLander-v2 trained agent\"\n","\n","# Create the evaluation env and set the render_mode=\"rgb_array\"\n","eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode=\"rgb_array\")])\n","\n","# PLACE the package_to_hub function you've just filled here\n","package_to_hub(model=model, # Our trained model\n","               model_name=model_name, # The name of our trained model\n","               model_architecture=model_architecture, # The model architecture we used: in our case PPO\n","               env_id=env_id, # Name of the environment\n","               eval_env=eval_env, # Evaluation Environment\n","               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n","               commit_message=commit_message)\n"],"metadata":{"id":"I2E--IJu8JYq"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["Congrats 🥳 you've just trained and uploaded your first Deep Reinforcement Learning agent. The script above should have displayed a link to a model repository such as https://huggingface.co/osanseviero/test_sb3. When you go to this link, you can:\n","* See a video preview of your agent at the right.\n","* Click \"Files and versions\" to see all the files in the repository.\n","* Click \"Use in stable-baselines3\" to get a code snippet that shows how to load the model.\n","* A model card (`README.md` file) which gives a description of the model\n","\n","Under the hood, the Hub uses git-based repositories (don't worry if you don't know what git is), which means you can update the model with new versions as you experiment and improve your agent.\n","\n","Compare the results of your LunarLander-v2 with your classmates using the leaderboard 🏆 👉 https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard"],"metadata":{"id":"T79AEAWEFIxz"}},{"cell_type":"markdown","metadata":{"id":"9nWnuQHRfFRa"},"source":["## Load a saved LunarLander model from the Hub 🤗\n","Thanks to [ironbar](https://github.com/ironbar) for the contribution.\n","\n","Loading a saved model from the Hub is really easy.\n","\n","You go to https://huggingface.co/models?library=stable-baselines3 to see the list of all the Stable-baselines3 saved models.\n","1. You select one and copy its repo_id\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit1/copy-id.png\" alt=\"Copy-id\"/>"]},{"cell_type":"markdown","metadata":{"id":"hNPLJF2bfiUw"},"source":["2. Then we just need to use load_from_hub with:\n","- The repo_id\n","- The filename: the saved model inside the repo and its extension (*.zip)"]},{"cell_type":"markdown","source":["Because the model I download from the Hub was trained with Gym (the former version of Gymnasium) we need to install shimmy a API conversion tool that will help us to run the environment correctly.\n","\n","Shimmy Documentation: https://github.com/Farama-Foundation/Shimmy"],"metadata":{"id":"bhb9-NtsinKB"}},{"cell_type":"code","source":["!pip install shimmy"],"metadata":{"id":"03WI-bkci1kH"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"oj8PSGHJfwz3"},"outputs":[],"source":["from huggingface_sb3 import load_from_hub\n","repo_id = \"Classroom-workshop/assignment2-omar\" # The repo_id\n","filename = \"ppo-LunarLander-v2.zip\" # The model filename.zip\n","\n","# When the model was trained on Python 3.8 the pickle protocol is 5\n","# But Python 3.6, 3.7 use protocol 4\n","# In order to get compatibility we need to:\n","# 1. Install pickle5 (we done it at the beginning of the colab)\n","# 2. Create a custom empty object we pass as parameter to PPO.load()\n","custom_objects = {\n","            \"learning_rate\": 0.0,\n","            \"lr_schedule\": lambda _: 0.0,\n","            \"clip_range\": lambda _: 0.0,\n","}\n","\n","checkpoint = load_from_hub(repo_id, filename)\n","model = PPO.load(checkpoint, custom_objects=custom_objects, print_system_info=True)"]},{"cell_type":"markdown","metadata":{"id":"Fs0Y-qgPgLUf"},"source":["Let's evaluate this agent:"]},{"cell_type":"code","source":["#@title\n","eval_env = Monitor(gym.make(\"LunarLander-v2\"))\n","mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n","print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")"],"metadata":{"id":"PAEVwK-aahfx"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"BQAwLnYFPk-s"},"source":["## Some additional challenges 🏆\n","The best way to learn **is to try things by your own**! As you saw, the current agent is not doing great. As a first suggestion, you can train for more steps. With 1,000,000 steps, we saw some great results!\n","\n","In the [Leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) you will find your agents. Can you get to the top?\n","\n","Here are some ideas to achieve so:\n","* Train more steps\n","* Try different hyperparameters for `PPO`. You can see them at https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#parameters.\n","* Check the [Stable-Baselines3 documentation](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html) and try another model such as DQN.\n","* **Push your new trained model** on the Hub 🔥\n","\n","**Compare the results of your LunarLander-v2 with your classmates** using the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) 🏆\n","\n","Is moon landing too boring for you? Try to **change the environment**, why not use MountainCar-v0, CartPole-v1 or CarRacing-v0? Check how they work [using the gym documentation](https://www.gymlibrary.dev/) and have fun 🎉."]},{"cell_type":"markdown","metadata":{"id":"9lM95-dvmif8"},"source":["________________________________________________________________________\n","Congrats on finishing this chapter! That was the biggest one, **and there was a lot of information.**\n","\n","If you’re still feel confused with all these elements...it's totally normal! **This was the same for me and for all people who studied RL.**\n","\n","Take time to really **grasp the material before continuing and try the additional challenges**. It’s important to master these elements and have a solid foundations.\n","\n","Naturally, during the course, we’re going to dive deeper into these concepts but **it’s better to have a good understanding of them now before diving into the next chapters.**\n","\n"]},{"cell_type":"markdown","metadata":{"id":"BjLhT70TEZIn"},"source":["Next time, in the bonus unit 1, you'll train Huggy the Dog to fetch the stick.\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit1/huggy.jpg\" alt=\"Huggy\"/>\n","\n","## Keep learning, stay awesome 🤗"]}], "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },"nbformat":4,"nbformat_minor":0}
diff --git a/HF DeepRL Course/Unit2 - Q-Learning.ipynb b/HF DeepRL Course/Unit2 - Q-Learning.ipynb
new file mode 100644
index 0000000..98e83fa
--- /dev/null
+++ b/HF DeepRL Course/Unit2 - Q-Learning.ipynb	
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"markdown","metadata":{"id":"njb_ProuHiOe"},"source":["# Unit 2: Q-Learning with FrozenLake-v1 ⛄ and Taxi-v3 🚕\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/thumbnail.jpg\" alt=\"Unit 2 Thumbnail\">\n","\n","In this notebook, **you'll code your first Reinforcement Learning agent from scratch** to play FrozenLake ❄️ using Q-Learning, share it with the community, and experiment with different configurations.\n","\n","⬇️ Here is an example of what **you will achieve in just a couple of minutes.** ⬇️\n"]},{"cell_type":"markdown","metadata":{"id":"vRU_vXBrl1Jx"},"source":["<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/envs.gif\" alt=\"Environments\"/>"]},{"cell_type":"markdown","source":["###🎮 Environments:\n","\n","- [FrozenLake-v1](https://gymnasium.farama.org/environments/toy_text/frozen_lake/)\n","- [Taxi-v3](https://gymnasium.farama.org/environments/toy_text/taxi/)\n","\n","###📚 RL-Library:\n","\n","- Python and NumPy\n","- [Gymnasium](https://gymnasium.farama.org/)\n","\n","We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the GitHub Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"DPTBOv9HYLZ2"}},{"cell_type":"markdown","metadata":{"id":"4i6tjI2tHQ8j"},"source":["## Objectives of this notebook 🏆\n","\n","At the end of the notebook, you will:\n","\n","- Be able to use **Gymnasium**, the environment library.\n","- Be able to code a Q-Learning agent from scratch.\n","- Be able to **push your trained agent and the code to the Hub** with a nice video replay and an evaluation score 🔥.\n","\n","\n"]},{"cell_type":"markdown","source":["## This notebook is from the Deep Reinforcement Learning Course\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/deep-rl-course-illustration.jpg\" alt=\"Deep RL Course illustration\"/>"],"metadata":{"id":"viNzVbVaYvY3"}},{"cell_type":"markdown","metadata":{"id":"6p5HnEefISCB"},"source":["In this free course, you will:\n","\n","- 📖 Study Deep Reinforcement Learning in **theory and practice**.\n","- 🧑‍💻 Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- 🤖 Train **agents in unique environments**\n","\n","And more check 📚 the syllabus 👉 https://simoninithomas.github.io/deep-rl-course\n","\n","Don’t forget to **<a href=\"http://eepurl.com/ic5ZUD\">sign up to the course</a>** (we are collecting your email to be able to **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us 👉🏻 https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"Y-mo_6rXIjRi"},"source":["## Prerequisites 🏗️\n","\n","Before diving into the notebook, you need to:\n","\n","🔲 📚 **Study [Q-Learning by reading Unit 2](https://huggingface.co/deep-rl-course/unit2/introduction)**  🤗  "]},{"cell_type":"markdown","metadata":{"id":"f2ONOODsyrMU"},"source":["## A small recap of Q-Learning"]},{"cell_type":"markdown","metadata":{"id":"V68VveLacfxJ"},"source":["*Q-Learning* **is the RL algorithm that**:\n","\n","- Trains *Q-Function*, an **action-value function** that encoded, in internal memory, by a *Q-table* **that contains all the state-action pair values.**\n","\n","- Given a state and action, our Q-Function **will search the Q-table for the corresponding value.**\n","    \n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/Q-function-2.jpg\" alt=\"Q function\"  width=\"100%\"/>\n","\n","- When the training is done,**we have an optimal Q-Function, so an optimal Q-Table.**\n","    \n","- And if we **have an optimal Q-function**, we\n","have an optimal policy, since we **know for, each state, the best action to take.**\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/link-value-policy.jpg\" alt=\"Link value policy\"  width=\"100%\"/>\n","\n","\n","But, in the beginning, our **Q-Table is useless since it gives arbitrary value for each state-action pair (most of the time we initialize the Q-Table to 0 values)**. But, as we’ll explore the environment and update our Q-Table it will give us better and better approximations\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit2/q-learning.jpeg\" alt=\"q-learning.jpeg\" width=\"100%\"/>\n","\n","This is the Q-Learning pseudocode:\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/Q-learning-2.jpg\" alt=\"Q-Learning\" width=\"100%\"/>\n"]},{"cell_type":"markdown","source":["# Let's code our first Reinforcement Learning algorithm 🚀"],"metadata":{"id":"HEtx8Y8MqKfH"}},{"cell_type":"markdown","source":["To validate this hands-on for the [certification process](https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process), you need to push your trained Taxi model to the Hub and **get a result of >= 4.5**.\n","\n","To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) and find your model, **the result = mean_reward - std of reward**\n","\n","For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"],"metadata":{"id":"Kdxb1IhzTn0v"}},{"cell_type":"markdown","source":["## Install dependencies and create a virtual display 🔽\n","\n","In the notebook, we'll need to generate a replay video. To do so, with Colab, **we need to have a virtual screen to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install the libraries and create and run a virtual screen 🖥\n","\n","We’ll install multiple ones:\n","\n","- `gymnasium`: Contains the FrozenLake-v1 ⛄ and Taxi-v3 🚕 environments.\n","- `pygame`: Used for the FrozenLake-v1 and Taxi-v3 UI.\n","- `numpy`: Used for handling our Q-table.\n","\n","The Hugging Face Hub 🤗 works as a central place where anyone can share and explore models and datasets. It has versioning, metrics, visualizations and other features that will allow you to easily collaborate with others.\n","\n","You can see here all the Deep RL models available (if they use Q Learning) here 👉 https://huggingface.co/models?other=q-learning"],"metadata":{"id":"4gpxC1_kqUYe"}},{"cell_type":"code","execution_count":25,"metadata":{"id":"9XaULfDZDvrC","executionInfo":{"status":"ok","timestamp":1696092114072,"user_tz":-60,"elapsed":7348,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit2/requirements-unit2.txt"]},{"cell_type":"code","source":["!sudo apt-get update\n","!sudo apt-get install -y python3-opengl\n","!apt install ffmpeg xvfb\n","!pip3 install pyvirtualdisplay"],"metadata":{"id":"n71uTX7qqzz2","executionInfo":{"status":"ok","timestamp":1696092131729,"user_tz":-60,"elapsed":17662,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"execution_count":26,"outputs":[]},{"cell_type":"markdown","source":["To make sure the new installed libraries are used, **sometimes it's required to restart the notebook runtime**. The next cell will force the **runtime to crash, so you'll need to connect again and run the code starting from here**. Thanks to this trick, **we will be able to run our virtual screen.**"],"metadata":{"id":"K6XC13pTfFiD"}},{"cell_type":"code","source":["import os\n","os.kill(os.getpid(), 9)"],"metadata":{"id":"3kuZbWAkfHdg"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"],"metadata":{"id":"DaY1N4dBrabi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696092139881,"user_tz":-60,"elapsed":400,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"e7e3ed15-9247-49da-d437-2bd4abcb4772"},"execution_count":1,"outputs":[{"output_type":"execute_result","data":{"text/plain":["<pyvirtualdisplay.display.Display at 0x7866f3476a10>"]},"metadata":{},"execution_count":1}]},{"cell_type":"markdown","metadata":{"id":"W-7f-Swax_9x"},"source":["## Import the packages 📦\n","\n","In addition to the installed libraries, we also use:\n","\n","- `random`: To generate random numbers (that will be useful for epsilon-greedy policy).\n","- `imageio`: To generate a replay video."]},{"cell_type":"code","execution_count":2,"metadata":{"id":"VcNvOAQlysBJ","executionInfo":{"status":"ok","timestamp":1696092143580,"user_tz":-60,"elapsed":485,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["import numpy as np\n","import gymnasium as gym\n","import random\n","import imageio\n","import os\n","import tqdm\n","\n","import pickle5 as pickle\n","from tqdm.notebook import tqdm"]},{"cell_type":"markdown","metadata":{"id":"xp4-bXKIy1mQ"},"source":["We're now ready to code our Q-Learning algorithm 🔥"]},{"cell_type":"markdown","metadata":{"id":"xya49aNJWVvv"},"source":["# Part 1: Frozen Lake ⛄ (non slippery version)"]},{"cell_type":"markdown","metadata":{"id":"NAvihuHdy9tw"},"source":["## Create and understand [FrozenLake environment ⛄]((https://gymnasium.farama.org/environments/toy_text/frozen_lake/)\n","---\n","\n","💡 A good habit when you start to use an environment is to check its documentation\n","\n","👉 https://gymnasium.farama.org/environments/toy_text/frozen_lake/\n","\n","---\n","\n","We're going to train our Q-Learning agent **to navigate from the starting state (S) to the goal state (G) by walking only on frozen tiles (F) and avoid holes (H)**.\n","\n","We can have two sizes of environment:\n","\n","- `map_name=\"4x4\"`: a 4x4 grid version\n","- `map_name=\"8x8\"`: a 8x8 grid version\n","\n","\n","The environment has two modes:\n","\n","- `is_slippery=False`: The agent always moves **in the intended direction** due to the non-slippery nature of the frozen lake (deterministic).\n","- `is_slippery=True`: The agent **may not always move in the intended direction** due to the slippery nature of the frozen lake (stochastic)."]},{"cell_type":"markdown","metadata":{"id":"UaW_LHfS0PY2"},"source":["For now let's keep it simple with the 4x4 map and non-slippery.\n","We add a parameter called `render_mode` that specifies how the environment should be visualised. In our case because we **want to record a video of the environment at the end, we need to set render_mode to rgb_array**.\n","\n","As [explained in the documentation](https://gymnasium.farama.org/api/env/#gymnasium.Env.render) “rgb_array”: Return a single frame representing the current state of the environment. A frame is a np.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"IzJnb8O3y8up"},"outputs":[],"source":["# Create the FrozenLake-v1 environment using 4x4 map and non-slippery version and render_mode=\"rgb_array\"\n","env = gym.make(\"FrozenLake-v1\", map_name=\"4x4\", is_slippery=False, render_mode=\"rgb_array\")"]},{"cell_type":"markdown","metadata":{"id":"Ji_UrI5l2zzn"},"source":["### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jNxUbPMP0akP"},"outputs":[],"source":["env = gym.make(\"FrozenLake-v1\", map_name=\"4x4\", is_slippery=False, render_mode=\"rgb_array\")"]},{"cell_type":"markdown","metadata":{"id":"KASNViqL4tZn"},"source":["You can create your own custom grid like this:\n","\n","```python\n","desc=[\"SFFF\", \"FHFH\", \"FFFH\", \"HFFG\"]\n","gym.make('FrozenLake-v1', desc=desc, is_slippery=True)\n","```\n","\n","but we'll use the default environment for now."]},{"cell_type":"markdown","metadata":{"id":"SXbTfdeJ1Xi9"},"source":["### Let's see what the Environment looks like:\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZNPG0g_UGCfh","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696079802454,"user_tz":-60,"elapsed":209,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"6e22a837-f2de-4e30-924d-851cbac15f7d"},"outputs":[{"output_type":"stream","name":"stdout","text":["_____OBSERVATION SPACE_____ \n","\n","Observation Space Discrete(16)\n","Sample observation 6\n"]}],"source":["# We create our environment with gym.make(\"<name_of_the_environment>\")- `is_slippery=False`: The agent always moves in the intended direction due to the non-slippery nature of the frozen lake (deterministic).\n","print(\"_____OBSERVATION SPACE_____ \\n\")\n","print(\"Observation Space\", env.observation_space)\n","print(\"Sample observation\", env.observation_space.sample()) # Get a random observation"]},{"cell_type":"markdown","metadata":{"id":"2MXc15qFE0M9"},"source":["We see with `Observation Space Shape Discrete(16)` that the observation is an integer representing the **agent’s current position as current_row * ncols + current_col (where both the row and col start at 0)**.\n","\n","For example, the goal position in the 4x4 map can be calculated as follows: 3 * 4 + 3 = 15. The number of possible observations is dependent on the size of the map. **For example, the 4x4 map has 16 possible observations.**\n","\n","\n","For instance, this is what state = 0 looks like:\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit2/frozenlake.png\" alt=\"FrozenLake\">"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"We5WqOBGLoSm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696079806433,"user_tz":-60,"elapsed":209,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"9eee93fb-dee1-47a0-a646-f1f3fe0aebba"},"outputs":[{"output_type":"stream","name":"stdout","text":["\n"," _____ACTION SPACE_____ \n","\n","Action Space Shape 4\n","Action Space Sample 1\n"]}],"source":["print(\"\\n _____ACTION SPACE_____ \\n\")\n","print(\"Action Space Shape\", env.action_space.n)\n","print(\"Action Space Sample\", env.action_space.sample()) # Take a random action"]},{"cell_type":"markdown","metadata":{"id":"MyxXwkI2Magx"},"source":["The action space (the set of possible actions the agent can take) is discrete with 4 actions available 🎮:\n","- 0: GO LEFT\n","- 1: GO DOWN\n","- 2: GO RIGHT\n","- 3: GO UP\n","\n","Reward function 💰:\n","- Reach goal: +1\n","- Reach hole: 0\n","- Reach frozen: 0"]},{"cell_type":"markdown","metadata":{"id":"1pFhWblk3Awr"},"source":["## Create and Initialize the Q-table 🗄️\n","\n","(👀 Step 1 of the pseudocode)\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/Q-learning-2.jpg\" alt=\"Q-Learning\" width=\"100%\"/>\n","\n","\n","It's time to initialize our Q-table! To know how many rows (states) and columns (actions) to use, we need to know the action and observation space. We already know their values from before, but we'll want to obtain them programmatically so that our algorithm generalizes for different environments. Gym provides us a way to do that: `env.action_space.n` and `env.observation_space.n`\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"y3ZCdluj3k0l","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696079808848,"user_tz":-60,"elapsed":229,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"a6729547-3ea2-4848-a685-fabdf79f243f"},"outputs":[{"output_type":"stream","name":"stdout","text":["There are  16  possible states\n","There are  4  possible actions\n"]}],"source":["state_space = env.observation_space.n\n","print(\"There are \", state_space, \" possible states\")\n","\n","action_space = env.action_space.n\n","print(\"There are \", action_space, \" possible actions\")"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"rCddoOXM3UQH","executionInfo":{"status":"ok","timestamp":1696092287710,"user_tz":-60,"elapsed":395,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["# Let's create our Qtable of size (state_space, action_space) and initialized each values at 0 using np.zeros. np.zeros needs a tuple (a,b)\n","def initialize_q_table(state_space, action_space):\n","  Qtable = np.zeros((state_space, action_space))\n","  return Qtable"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"9YfvrqRt3jdR"},"outputs":[],"source":["Qtable_frozenlake = initialize_q_table(state_space, action_space)"]},{"cell_type":"markdown","metadata":{"id":"67OdoKL63eDD"},"source":["### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"HuTKv3th3ohG"},"outputs":[],"source":["state_space = env.observation_space.n\n","print(\"There are \", state_space, \" possible states\")\n","\n","action_space = env.action_space.n\n","print(\"There are \", action_space, \" possible actions\")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lnrb_nX33fJo"},"outputs":[],"source":["# Let's create our Qtable of size (state_space, action_space) and initialized each values at 0 using np.zeros\n","def initialize_q_table(state_space, action_space):\n","  Qtable = np.zeros((state_space, action_space))\n","  return Qtable"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Y0WlgkVO3Jf9"},"outputs":[],"source":["Qtable_frozenlake = initialize_q_table(state_space, action_space)"]},{"cell_type":"markdown","metadata":{"id":"Atll4Z774gri"},"source":["## Define the greedy policy 🤖\n","\n","Remember we have two policies since Q-Learning is an **off-policy** algorithm. This means we're using a **different policy for acting and updating the value function**.\n","\n","- Epsilon-greedy policy (acting policy)\n","- Greedy-policy (updating policy)\n","\n","The greedy policy will also be the final policy we'll have when the Q-learning agent completes training. The greedy policy is used to select an action using the Q-table.\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/off-on-4.jpg\" alt=\"Q-Learning\" width=\"100%\"/>\n"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"E3SCLmLX5bWG","executionInfo":{"status":"ok","timestamp":1696092291068,"user_tz":-60,"elapsed":277,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def greedy_policy(Qtable, state):\n","  # Exploitation: take the action with the highest state, action value\n","  action = np.argmax(Qtable[state][:])\n","\n","  return action"]},{"cell_type":"markdown","metadata":{"id":"B2_-8b8z5k54"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"se2OzWGW5kYJ"},"outputs":[],"source":["def greedy_policy(Qtable, state):\n","  # Exploitation: take the action with the highest state, action value\n","  action = np.argmax(Qtable[state][:])\n","\n","  return action"]},{"cell_type":"markdown","metadata":{"id":"flILKhBU3yZ7"},"source":["##Define the epsilon-greedy policy 🤖\n","\n","Epsilon-greedy is the training policy that handles the exploration/exploitation trade-off.\n","\n","The idea with epsilon-greedy:\n","\n","- With *probability 1 - ɛ* : **we do exploitation** (i.e. our agent selects the action with the highest state-action pair value).\n","\n","- With *probability ɛ*: we do **exploration** (trying a random action).\n","\n","As the training continues, we progressively **reduce the epsilon value since we will need less and less exploration and more exploitation.**\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/Q-learning-4.jpg\" alt=\"Q-Learning\" width=\"100%\"/>\n"]},{"cell_type":"code","execution_count":9,"metadata":{"id":"6Bj7x3in3_Pq","executionInfo":{"status":"ok","timestamp":1696092293376,"user_tz":-60,"elapsed":294,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def epsilon_greedy_policy(Qtable, state, epsilon):\n","  # Randomly generate a number between 0 and 1\n","  random_num = random.uniform(0, 1)\n","  # if random_num > greater than epsilon --> exploitation\n","  if random_num > epsilon:\n","    # Take the action with the highest value given a state\n","    # np.argmax can be useful here\n","    action = greedy_policy(Qtable, state)\n","  # else --> exploration\n","  else:\n","    action = env.action_space.sample() # Take a random action\n","\n","  return action"]},{"cell_type":"markdown","metadata":{"id":"8R5ej1fS4P2V"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cYxHuckr4LiG"},"outputs":[],"source":["def epsilon_greedy_policy(Qtable, state, epsilon):\n","  # Randomly generate a number between 0 and 1\n","  random_num = random.uniform(0,1)\n","  # if random_num > greater than epsilon --> exploitation\n","  if random_num > epsilon:\n","    # Take the action with the highest value given a state\n","    # np.argmax can be useful here\n","    action = greedy_policy(Qtable, state)\n","  # else --> exploration\n","  else:\n","    action = env.action_space.sample()\n","\n","  return action"]},{"cell_type":"markdown","metadata":{"id":"hW80DealcRtu"},"source":["## Define the hyperparameters ⚙️\n","\n","The exploration related hyperparamters are some of the most important ones.\n","\n","- We need to make sure that our agent **explores enough of the state space** to learn a good value approximation. To do that, we need to have progressive decay of the epsilon.\n","- If you decrease epsilon too fast (too high decay_rate), **you take the risk that your agent will be stuck**, since your agent didn't explore enough of the state space and hence can't solve the problem."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Y1tWn0tycWZ1"},"outputs":[],"source":["# Training parameters\n","n_training_episodes = 10000  # Total training episodes\n","learning_rate = 0.7          # Learning rate\n","\n","# Evaluation parameters\n","n_eval_episodes = 100        # Total number of test episodes\n","\n","# Environment parameters\n","env_id = \"FrozenLake-v1\"     # Name of the environment\n","max_steps = 99               # Max steps per episode\n","gamma = 0.95                 # Discounting rate\n","eval_seed = []               # The evaluation seed of the environment\n","\n","# Exploration parameters\n","max_epsilon = 1.0             # Exploration probability at start\n","min_epsilon = 0.05            # Minimum exploration probability\n","decay_rate = 0.0002           # Exponential decay rate for exploration prob"]},{"cell_type":"markdown","metadata":{"id":"cDb7Tdx8atfL"},"source":["## Create the training loop method\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit3/Q-learning-2.jpg\" alt=\"Q-Learning\" width=\"100%\"/>\n","\n","The training loop goes like this:\n","\n","```\n","For episode in the total of training episodes:\n","\n","Reduce epsilon (since we need less and less exploration)\n","Reset the environment\n","\n","  For step in max timesteps:    \n","    Choose the action At using epsilon greedy policy\n","    Take the action (a) and observe the outcome state(s') and reward (r)\n","    Update the Q-value Q(s,a) using Bellman equation Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n","    If done, finish the episode\n","    Our next state is the new state\n","```"]},{"cell_type":"code","execution_count":32,"metadata":{"id":"paOynXy3aoJW","executionInfo":{"status":"ok","timestamp":1696092879553,"user_tz":-60,"elapsed":300,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable, lr):\n","  for episode in tqdm(range(n_training_episodes)):\n","    # Reduce epsilon (because we need less and less exploration)\n","    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)\n","    # Reset the environment\n","    state, info = env.reset()\n","    step = 0\n","    terminated = False\n","    truncated = False\n","\n","    # repeat\n","    for step in range(max_steps):\n","      # Choose the action At using epsilon greedy policy\n","      action = epsilon_greedy_policy(Qtable, state, epsilon)\n","\n","      # Take action At and observe Rt+1 and St+1\n","      # Take the action (a) and observe the outcome state(s') and reward (r)\n","      new_state, reward, terminated, truncated, info = env.step(action)\n","\n","      # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n","      Qtable[state][action] = Qtable[state][action] + lr * (reward + gamma * np.max(Qtable[new_state]) - Qtable[state][action])\n","\n","      # If terminated or truncated finish the episode\n","      if terminated or truncated:\n","        break\n","\n","      # Our next state is the new state\n","      state = new_state\n","  return Qtable"]},{"cell_type":"markdown","metadata":{"id":"Pnpk2ePoem3r"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"IyZaYbUAeolw"},"outputs":[],"source":["def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable):\n","  for episode in tqdm(range(n_training_episodes)):\n","    # Reduce epsilon (because we need less and less exploration)\n","    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)\n","    # Reset the environment\n","    state, info = env.reset()\n","    step = 0\n","    terminated = False\n","    truncated = False\n","\n","    # repeat\n","    for step in range(max_steps):\n","      # Choose the action At using epsilon greedy policy\n","      action = epsilon_greedy_policy(Qtable, state, epsilon)\n","\n","      # Take action At and observe Rt+1 and St+1\n","      # Take the action (a) and observe the outcome state(s') and reward (r)\n","      new_state, reward, terminated, truncated, info = env.step(action)\n","\n","      # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n","      Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * np.max(Qtable[new_state]) - Qtable[state][action])\n","\n","      # If terminated or truncated finish the episode\n","      if terminated or truncated:\n","        break\n","\n","      # Our next state is the new state\n","      state = new_state\n","  return Qtable"]},{"cell_type":"markdown","metadata":{"id":"WLwKQ4tUdhGI"},"source":["## Train the Q-Learning agent 🏃"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"DPBxfjJdTCOH","colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["e6f0cdea90a54a409d1b6bb5da149fa2","1c2b811eaf5c46bdbae02a2c61a51ffd","840f4495c39f4dfca4ae71602ccd41e9","3de082bd7cbd40b9be4aec0e684becba","9833920632dd4f988b6f18e8caccbc5d","828041fe06b74a6a9ea122ff94a4e2a4","c0a0ecce9da741c6884684ac9224e388","41af6d560ff648b58032316a7367e001","8ac7eea521864e47bc77533e33a5990e","98ac2fcbdebd41e8a0465564c7f9f73c","fb6a4a28f32c417f88ae9b485051f39e"]},"executionInfo":{"status":"ok","timestamp":1696079839206,"user_tz":-60,"elapsed":3370,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"29414ef8-dc8f-4460-f557-2c9c540d13d2"},"outputs":[{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/10000 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e6f0cdea90a54a409d1b6bb5da149fa2"}},"metadata":{}}],"source":["Qtable_frozenlake = train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable_frozenlake, learning_rate)"]},{"cell_type":"markdown","metadata":{"id":"yVeEhUCrc30L"},"source":["## Let's see what our Q-Learning table looks like now 👀"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"nmfchsTITw4q","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696079839206,"user_tz":-60,"elapsed":4,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"f4c7de13-790c-4d7d-82bb-7290475b37b3"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0.73509189, 0.77378094, 0.77378094, 0.73509189],\n","       [0.73509189, 0.        , 0.81450625, 0.77378094],\n","       [0.77378094, 0.857375  , 0.77378094, 0.81450625],\n","       [0.81450625, 0.        , 0.77378094, 0.77378094],\n","       [0.77378094, 0.81450625, 0.        , 0.73509189],\n","       [0.        , 0.        , 0.        , 0.        ],\n","       [0.        , 0.9025    , 0.        , 0.81450625],\n","       [0.        , 0.        , 0.        , 0.        ],\n","       [0.81450625, 0.        , 0.857375  , 0.77378094],\n","       [0.81450625, 0.9025    , 0.9025    , 0.        ],\n","       [0.857375  , 0.95      , 0.        , 0.857375  ],\n","       [0.        , 0.        , 0.        , 0.        ],\n","       [0.        , 0.        , 0.        , 0.        ],\n","       [0.        , 0.9025    , 0.95      , 0.857375  ],\n","       [0.9025    , 0.95      , 1.        , 0.9025    ],\n","       [0.        , 0.        , 0.        , 0.        ]])"]},"metadata":{},"execution_count":14}],"source":["Qtable_frozenlake"]},{"cell_type":"markdown","metadata":{"id":"pUrWkxsHccXD"},"source":["## The evaluation method 📝\n","\n","- We defined the evaluation method that we're going to use to test our Q-Learning agent."]},{"cell_type":"code","execution_count":11,"metadata":{"id":"jNl0_JO2cbkm","executionInfo":{"status":"ok","timestamp":1696092302558,"user_tz":-60,"elapsed":275,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def evaluate_agent(env, max_steps, n_eval_episodes, Q, seed):\n","  \"\"\"\n","  Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n","  :param env: The evaluation environment\n","  :param max_steps: Maximum number of steps per episode\n","  :param n_eval_episodes: Number of episode to evaluate the agent\n","  :param Q: The Q-table\n","  :param seed: The evaluation seed array (for taxi-v3)\n","  \"\"\"\n","  episode_rewards = []\n","  for episode in tqdm(range(n_eval_episodes)):\n","    if seed:\n","      state, info = env.reset(seed=seed[episode])\n","    else:\n","      state, info = env.reset()\n","    step = 0\n","    truncated = False\n","    terminated = False\n","    total_rewards_ep = 0\n","\n","    for step in range(max_steps):\n","      # Take the action (index) that have the maximum expected future reward given that state\n","      action = greedy_policy(Q, state)\n","      new_state, reward, terminated, truncated, info = env.step(action)\n","      total_rewards_ep += reward\n","\n","      if terminated or truncated:\n","        break\n","      state = new_state\n","    episode_rewards.append(total_rewards_ep)\n","  mean_reward = np.mean(episode_rewards)\n","  std_reward = np.std(episode_rewards)\n","\n","  return mean_reward, std_reward"]},{"cell_type":"markdown","metadata":{"id":"0jJqjaoAnxUo"},"source":["## Evaluate our Q-Learning agent 📈\n","\n","- Usually, you should have a mean reward of 1.0\n","- The **environment is relatively easy** since the state space is really small (16). What you can try to do is [to replace it with the slippery version](https://gymnasium.farama.org/environments/toy_text/frozen_lake/), which introduces stochasticity, making the environment more complex."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"fAgB7s0HEFMm","colab":{"base_uri":"https://localhost:8080/","height":67,"referenced_widgets":["9288c30c30e14f8e964256563722f0f1","fdd5af398f844d6199f7be41e14b90d9","45712c72b8024eda976dfe6791af84d7","e0effc981c2d4baeb1a6cf6224b22fc4","04878d4e8ebf4d5d86fef0edc22b03e7","6bb9b7084bf74043914f4f4b6d35469f","e521204e0833433b825b3036d14d02ee","810a3f5035d648779b5550f82f64264c","0a231d4af7cb4fe29f2be2bce9a0eb8c","d70c52144c824b6ab1133576ed52c635","21ea34cbd4724403b10cff3a41461e06"]},"executionInfo":{"status":"ok","timestamp":1696079843334,"user_tz":-60,"elapsed":280,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"c80e0ada-8fb9-4079-cf28-03bb47261d0e"},"outputs":[{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/100 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9288c30c30e14f8e964256563722f0f1"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Mean_reward=1.00 +/- 0.00\n"]}],"source":["# Evaluate our Agent\n","mean_reward, std_reward = evaluate_agent(env, max_steps, n_eval_episodes, Qtable_frozenlake, eval_seed)\n","print(f\"Mean_reward={mean_reward:.2f} +/- {std_reward:.2f}\")"]},{"cell_type":"markdown","metadata":{"id":"yxaP3bPdg1DV"},"source":["## Publish our trained model to the Hub 🔥\n","\n","Now that we saw good results after the training, **we can publish our trained model to the Hub 🤗 with one line of code**.\n","\n","Here's an example of a Model Card:\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit2/modelcard.png\" alt=\"Model card\" width=\"100%\"/>\n"]},{"cell_type":"markdown","metadata":{"id":"kv0k1JQjpMq3"},"source":["Under the hood, the Hub uses git-based repositories (don't worry if you don't know what git is), which means you can update the model with new versions as you experiment and improve your agent."]},{"cell_type":"markdown","metadata":{"id":"QZ5LrR-joIHD"},"source":["#### Do not modify this code"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"Jex3i9lZ8ksX","executionInfo":{"status":"ok","timestamp":1696092305316,"user_tz":-60,"elapsed":664,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["from huggingface_hub import HfApi, snapshot_download\n","from huggingface_hub.repocard import metadata_eval_result, metadata_save\n","\n","from pathlib import Path\n","import datetime\n","import json"]},{"cell_type":"code","execution_count":13,"metadata":{"id":"Qo57HBn3W74O","executionInfo":{"status":"ok","timestamp":1696092305970,"user_tz":-60,"elapsed":353,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def record_video(env, Qtable, out_directory, fps=1):\n","  \"\"\"\n","  Generate a replay video of the agent\n","  :param env\n","  :param Qtable: Qtable of our agent\n","  :param out_directory\n","  :param fps: how many frame per seconds (with taxi-v3 and frozenlake-v1 we use 1)\n","  \"\"\"\n","  images = []\n","  terminated = False\n","  truncated = False\n","  state, info = env.reset(seed=random.randint(0,500))\n","  img = env.render()\n","  images.append(img)\n","  while not terminated or truncated:\n","    # Take the action (index) that have the maximum expected future reward given that state\n","    action = np.argmax(Qtable[state][:])\n","    state, reward, terminated, truncated, info = env.step(action) # We directly put next_state = state for recording logic\n","    img = env.render()\n","    images.append(img)\n","  imageio.mimsave(out_directory, [np.array(img) for i, img in enumerate(images)], fps=fps)"]},{"cell_type":"code","source":["def push_to_hub(\n","    repo_id, model, env, video_fps=1, local_repo_path=\"hub\"\n","):\n","    \"\"\"\n","    Evaluate, Generate a video and Upload a model to Hugging Face Hub.\n","    This method does the complete pipeline:\n","    - It evaluates the model\n","    - It generates the model card\n","    - It generates a replay video of the agent\n","    - It pushes everything to the Hub\n","\n","    :param repo_id: repo_id: id of the model repository from the Hugging Face Hub\n","    :param env\n","    :param video_fps: how many frame per seconds to record our video replay\n","    (with taxi-v3 and frozenlake-v1 we use 1)\n","    :param local_repo_path: where the local repository is\n","    \"\"\"\n","    _, repo_name = repo_id.split(\"/\")\n","\n","    eval_env = env\n","    api = HfApi()\n","\n","    # Step 1: Create the repo\n","    repo_url = api.create_repo(\n","        repo_id=repo_id,\n","        exist_ok=True,\n","    )\n","\n","    # Step 2: Download files\n","    repo_local_path = Path(snapshot_download(repo_id=repo_id))\n","\n","    # Step 3: Save the model\n","    if env.spec.kwargs.get(\"map_name\"):\n","        model[\"map_name\"] = env.spec.kwargs.get(\"map_name\")\n","        if env.spec.kwargs.get(\"is_slippery\", \"\") == False:\n","            model[\"slippery\"] = False\n","\n","    # Pickle the model\n","    with open((repo_local_path) / \"q-learning.pkl\", \"wb\") as f:\n","        pickle.dump(model, f)\n","\n","    # Step 4: Evaluate the model and build JSON with evaluation metrics\n","    mean_reward, std_reward = evaluate_agent(\n","        eval_env, model[\"max_steps\"], model[\"n_eval_episodes\"], model[\"qtable\"], model[\"eval_seed\"]\n","    )\n","\n","    evaluate_data = {\n","        \"env_id\": model[\"env_id\"],\n","        \"mean_reward\": mean_reward,\n","        \"n_eval_episodes\": model[\"n_eval_episodes\"],\n","        \"eval_datetime\": datetime.datetime.now().isoformat()\n","    }\n","\n","    # Write a JSON file called \"results.json\" that will contain the\n","    # evaluation results\n","    with open(repo_local_path / \"results.json\", \"w\") as outfile:\n","        json.dump(evaluate_data, outfile)\n","\n","    # Step 5: Create the model card\n","    env_name = model[\"env_id\"]\n","    if env.spec.kwargs.get(\"map_name\"):\n","        env_name += \"-\" + env.spec.kwargs.get(\"map_name\")\n","\n","    if env.spec.kwargs.get(\"is_slippery\", \"\") == False:\n","        env_name += \"-\" + \"no_slippery\"\n","\n","    metadata = {}\n","    metadata[\"tags\"] = [env_name, \"q-learning\", \"reinforcement-learning\", \"custom-implementation\"]\n","\n","    # Add metrics\n","    eval = metadata_eval_result(\n","        model_pretty_name=repo_name,\n","        task_pretty_name=\"reinforcement-learning\",\n","        task_id=\"reinforcement-learning\",\n","        metrics_pretty_name=\"mean_reward\",\n","        metrics_id=\"mean_reward\",\n","        metrics_value=f\"{mean_reward:.2f} +/- {std_reward:.2f}\",\n","        dataset_pretty_name=env_name,\n","        dataset_id=env_name,\n","    )\n","\n","    # Merges both dictionaries\n","    metadata = {**metadata, **eval}\n","\n","    model_card = f\"\"\"\n","  # **Q-Learning** Agent playing1 **{env_id}**\n","  This is a trained model of a **Q-Learning** agent playing **{env_id}** .\n","\n","  ## Usage\n","\n","  ```python\n","\n","  model = load_from_hub(repo_id=\"{repo_id}\", filename=\"q-learning.pkl\")\n","\n","  # Don't forget to check if you need to add additional attributes (is_slippery=False etc)\n","  env = gym.make(model[\"env_id\"])\n","  ```\n","  \"\"\"\n","\n","    evaluate_agent(env, model[\"max_steps\"], model[\"n_eval_episodes\"], model[\"qtable\"], model[\"eval_seed\"])\n","\n","    readme_path = repo_local_path / \"README.md\"\n","    readme = \"\"\n","    print(readme_path.exists())\n","    if readme_path.exists():\n","        with readme_path.open(\"r\", encoding=\"utf8\") as f:\n","            readme = f.read()\n","    else:\n","        readme = model_card\n","\n","    with readme_path.open(\"w\", encoding=\"utf-8\") as f:\n","        f.write(readme)\n","\n","    # Save our metrics to Readme metadata\n","    metadata_save(readme_path, metadata)\n","\n","    # Step 6: Record a video\n","    video_path = repo_local_path / \"replay.mp4\"\n","    record_video(env, model[\"qtable\"], video_path, video_fps)\n","\n","    # Step 7. Push everything to the Hub\n","    api.upload_folder(\n","        repo_id=repo_id,\n","        folder_path=repo_local_path,\n","        path_in_repo=\".\",\n","    )\n","\n","    print(\"Your model is pushed to the Hub. You can view your model here: \", repo_url)"],"metadata":{"id":"U4mdUTKkGnUd","executionInfo":{"status":"ok","timestamp":1696092307631,"user_tz":-60,"elapsed":432,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"execution_count":14,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"81J6cet_ogSS"},"source":["### .\n","\n","By using `push_to_hub` **you evaluate, record a replay, generate a model card of your agent and push it to the Hub**.\n","\n","This way:\n","- You can **showcase our work** 🔥\n","- You can **visualize your agent playing** 👀\n","- You can **share an agent with the community that others can use** 💾\n","- You can **access a leaderboard 🏆 to see how well your agent is performing compared to your classmates** 👉 https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard\n"]},{"cell_type":"markdown","metadata":{"id":"cWnFC0iZooTw"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1️⃣ (If it's not already done) create an account to HF ➡ https://huggingface.co/join\n","\n","2️⃣ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/create-token.jpg\" alt=\"Create HF Token\">\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"QB5nIcxR8paT","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["4119e481cf774e14a39e3480bf3dee23","c55f984de9d0425792199bb0204b2a88","5ae9e2d6dc10461f8d1f99b5ed460946","cd98d11981f34cdcb8209cbaaf74f61e","e9244f63c5f5486898935cd4ba84fed8","9496a0198bc64e3ea7180059fe35cb58","747b9c6d02e54bef8cc875a06ee0caae","166beba22ad74a7fbf21d101156b9f28","d9ad51d7a6524f9884c85b6ae14d7212","8591dbbbc0fe4a20ab143914dd849b4c","bde8da2b29d64d5cbc158a34697acb7b","cfa99abd6ac34994bd48b5919375c96b","ce25d48725e84931b5a1ad3f0c5cd057","ebc0a564f44c4428ae315638657f15de","79760d8954834265a6c7f637a5f5f57b","ee7230b508824636aa1de3aa9e547537","4dbdec8615eb43728988b1e92868f7bf","6fc271597c73420385a1011461269bb8","75744d5a04a0483fa6b19711b287a59c","440f9915f04e4972b963258808503a9c","918c9e6775bf4f319dbc4e68057f2ce0","b73a1998a2e74c17a2dcdff3bfcdff5c","48437e2d495c4646a31c7647820e8f77","e4db486e42914c94bf41f4f521e7085a","04e81cad97634c9a830d38da5a026e25","6ab23d487ce94e8cb628096346ae7f10","52d901fc9d0b4c21805aa432737c1009","bc43bc4504004b2285d7c087b92f6d6e","4534b7cb3f374ec594312a73a63cb2aa","e9f846e8823241f680c4f077cabbedac","548d1565762546889e4af50c2002619b","201a94eea14741e1a2c19072205df7f7"]},"executionInfo":{"status":"ok","timestamp":1696079866404,"user_tz":-60,"elapsed":233,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"cf6d9422-012d-4d92-a248-9e1feea240b2"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"4119e481cf774e14a39e3480bf3dee23"}},"metadata":{}}],"source":["from huggingface_hub import notebook_login\n","notebook_login()"]},{"cell_type":"markdown","metadata":{"id":"GyWc1x3-o3xG"},"source":["If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login` (or `login`)"]},{"cell_type":"markdown","metadata":{"id":"Gc5AfUeFo3xH"},"source":["3️⃣ We're now ready to push our trained agent to the 🤗 Hub 🔥 using `push_to_hub()` function\n","\n","- Let's create **the model dictionary that contains the hyperparameters and the Q_table**."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"FiMqxqVHg0I4"},"outputs":[],"source":["model = {\n","    \"env_id\": env_id,\n","    \"max_steps\": max_steps,\n","    \"n_training_episodes\": n_training_episodes,\n","    \"n_eval_episodes\": n_eval_episodes,\n","    \"eval_seed\": eval_seed,\n","\n","    \"learning_rate\": learning_rate,\n","    \"gamma\": gamma,\n","\n","    \"max_epsilon\": max_epsilon,\n","    \"min_epsilon\": min_epsilon,\n","    \"decay_rate\": decay_rate,\n","\n","    \"qtable\": Qtable_frozenlake\n","}"]},{"cell_type":"markdown","metadata":{"id":"9kld-AEso3xH"},"source":["Let's fill the `push_to_hub` function:\n","\n","- `repo_id`: the name of the Hugging Face Hub Repository that will be created/updated `\n","(repo_id = {username}/{repo_name})`\n","💡 A good `repo_id` is `{username}/q-{env_id}`\n","- `model`: our model dictionary containing the hyperparameters and the Qtable.\n","- `env`: the environment.\n","- `commit_message`: message of the commit"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5sBo2umnXpPd","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696079872377,"user_tz":-60,"elapsed":225,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"30d8e1a4-42cc-45d4-eceb-b0f9c86e1af5"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'env_id': 'FrozenLake-v1',\n"," 'max_steps': 99,\n"," 'n_training_episodes': 10000,\n"," 'n_eval_episodes': 100,\n"," 'eval_seed': [],\n"," 'learning_rate': 0.7,\n"," 'gamma': 0.95,\n"," 'max_epsilon': 1.0,\n"," 'min_epsilon': 0.05,\n"," 'decay_rate': 0.0002,\n"," 'qtable': array([[0.73509189, 0.77378094, 0.77378094, 0.73509189],\n","        [0.73509189, 0.        , 0.81450625, 0.77378094],\n","        [0.77378094, 0.857375  , 0.77378094, 0.81450625],\n","        [0.81450625, 0.        , 0.77378094, 0.77378094],\n","        [0.77378094, 0.81450625, 0.        , 0.73509189],\n","        [0.        , 0.        , 0.        , 0.        ],\n","        [0.        , 0.9025    , 0.        , 0.81450625],\n","        [0.        , 0.        , 0.        , 0.        ],\n","        [0.81450625, 0.        , 0.857375  , 0.77378094],\n","        [0.81450625, 0.9025    , 0.9025    , 0.        ],\n","        [0.857375  , 0.95      , 0.        , 0.857375  ],\n","        [0.        , 0.        , 0.        , 0.        ],\n","        [0.        , 0.        , 0.        , 0.        ],\n","        [0.        , 0.9025    , 0.95      , 0.857375  ],\n","        [0.9025    , 0.95      , 1.        , 0.9025    ],\n","        [0.        , 0.        , 0.        , 0.        ]])}"]},"metadata":{},"execution_count":23}],"source":["model"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"RpOTtSt83kPZ","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["6363a5b1c65749c6977f97c2b00c55ec","b54c8a87ab0d4850aaf8f9e6e5af5de3","196e8c478db34d4c949ada9697d87f2d","518fa64f8c3846e886468ac731edbfaa","b2b2baeb9d4c415bab801855351d4ce1","c376e2f1509947b0b0115160bbd121d2","2b2ed6fb86f344a69a2cbc5e0f4fd479","e3cc65bebe4a4d0cbd765bb826cae2df","c4abebed7d954ac9b12d7f02240a0895","b4503a457c044fd2aa5c58391427571c","c60c04f8a206433f9a75cb9f3568bf20","ae5a832a46d94ce68c4a4c9e2636a562","c0a9f2753fb24bd899dccc94e914a296","7098717195b54ad481f7bbdfd1f988d9","c59b3e51fc78441488eb0823f4482730","f5d23de601f643158c10b9a5df7914aa","d9f2b2d47c6b4249a89a38d7f9e6ff67","807481d4c14b43b7b03f75f4f546b9ff","ff70d642a053434eaf8500872709e56f","abcb725b1d74471c8d7bba61ad2de760","5cd1832337c14c3cac847142c0d2a18e","bb44cc6e87654874a0a1bec09b5a54f4","73702d5503374a0d9787365c45c5da4c","0134a25d0993417a8cac06177939e642","c503c61dac1c405991dd693ded8f0947","8e75b304812a4ca0b3c6e963a41f2560","416154146a6d41fbb8231b334c26e02e","f8aed36c53c84744a02b1f6578c56233","72b575b0d9b041b2b0465509d2c4158d","e22407fa6ac54bca94ffdfc76be62b5f","12120b5c42b0445fb9afcd651bdd075e","d125a8de2fdb4478b08a47ce232db088","f7c5020bc4ac43bba38e70cfe7379283","0ae97a99f86540079c48b842681c6460","e5356c548e84454585a645b66ffe84e0","728955de7f2940229f35febdf2a52fee","e6a280b5696b4c7e934d162cbb9f47b2","15eb4d2b8b124ddf96d07bf9ccdf6410","5546de085a3841f78067d8c4a694c6b3","d9326170d94d4a5db9a2262ef2299149","fd1d7bf7b736412bb6b45ae5446bb5dc","9a5506c45ccd451695ec028e35a85560","0d9bed76678d42e9b9e707edda2fb334","dbb8148d047847de9a0b5c72004b821a"]},"executionInfo":{"status":"ok","timestamp":1696079877673,"user_tz":-60,"elapsed":3088,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"0a8a8e84-1ec5-4a45-b67f-fe3ed7aea47b"},"outputs":[{"output_type":"display_data","data":{"text/plain":["Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6363a5b1c65749c6977f97c2b00c55ec"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/100 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ae5a832a46d94ce68c4a4c9e2636a562"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/100 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"73702d5503374a0d9787365c45c5da4c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["q-learning.pkl:   0%|          | 0.00/914 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0ae97a99f86540079c48b842681c6460"}},"metadata":{}}],"source":["username = \"jake-walker\" # FILL THIS\n","repo_name = \"q-FrozenLake-v1-4x4-noSlippery\"\n","push_to_hub(\n","    repo_id=f\"{username}/{repo_name}\",\n","    model=model,\n","    env=env)"]},{"cell_type":"markdown","metadata":{"id":"E2875IGsprzq"},"source":["Congrats 🥳 you've just implemented from scratch, trained, and uploaded your first Reinforcement Learning agent.\n","FrozenLake-v1 no_slippery is very simple environment, let's try a harder one 🔥."]},{"cell_type":"markdown","metadata":{"id":"18lN8Bz7yvLt"},"source":["# Part 2: Taxi-v3 🚖\n","\n","## Create and understand [Taxi-v3 🚕](https://gymnasium.farama.org/environments/toy_text/taxi/)\n","---\n","\n","💡 A good habit when you start to use an environment is to check its documentation\n","\n","👉 https://gymnasium.farama.org/environments/toy_text/taxi/\n","\n","---\n","\n","In `Taxi-v3` 🚕, there are four designated locations in the grid world indicated by R(ed), G(reen), Y(ellow), and B(lue).\n","\n","When the episode starts, **the taxi starts off at a random square** and the passenger is at a random location. The taxi drives to the passenger’s location, **picks up the passenger**, drives to the passenger’s destination (another one of the four specified locations), and then **drops off the passenger**. Once the passenger is dropped off, the episode ends.\n","\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit2/taxi.png\" alt=\"Taxi\">\n"]},{"cell_type":"code","execution_count":3,"metadata":{"id":"gL0wpeO8gpej","executionInfo":{"status":"ok","timestamp":1696092196201,"user_tz":-60,"elapsed":279,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["env = gym.make(\"Taxi-v3\", render_mode=\"rgb_array\")"]},{"cell_type":"markdown","metadata":{"id":"gBOaXgtsrmtT"},"source":["There are **500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger** (including the case when the passenger is in the taxi), and **4 destination locations.**\n"]},{"cell_type":"code","execution_count":4,"metadata":{"id":"_TPNaGSZrgqA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696092200923,"user_tz":-60,"elapsed":277,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"0660ae9e-2983-4dd5-80c9-ef3b6dee80cd"},"outputs":[{"output_type":"stream","name":"stdout","text":["There are  500  possible states\n"]}],"source":["state_space = env.observation_space.n\n","print(\"There are \", state_space, \" possible states\")"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"CdeeZuokrhit","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696092203051,"user_tz":-60,"elapsed":336,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"5c91177f-6b05-4dab-a02a-3be9cdcc719a"},"outputs":[{"output_type":"stream","name":"stdout","text":["There are  6  possible actions\n"]}],"source":["action_space = env.action_space.n\n","print(\"There are \", action_space, \" possible actions\")"]},{"cell_type":"markdown","metadata":{"id":"R1r50Advrh5Q"},"source":["The action space (the set of possible actions the agent can take) is discrete with **6 actions available 🎮**:\n","\n","- 0: move south\n","- 1: move north\n","- 2: move east\n","- 3: move west\n","- 4: pickup passenger\n","- 5: drop off passenger\n","\n","Reward function 💰:\n","\n","- -1 per step unless other reward is triggered.\n","- +20 delivering passenger.\n","- -10 executing “pickup” and “drop-off” actions illegally."]},{"cell_type":"code","execution_count":15,"metadata":{"id":"US3yDXnEtY9I","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696092315666,"user_tz":-60,"elapsed":7,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"7064388e-0185-4a00-f4df-dde50786fcfb"},"outputs":[{"output_type":"stream","name":"stdout","text":["[[0. 0. 0. 0. 0. 0.]\n"," [0. 0. 0. 0. 0. 0.]\n"," [0. 0. 0. 0. 0. 0.]\n"," ...\n"," [0. 0. 0. 0. 0. 0.]\n"," [0. 0. 0. 0. 0. 0.]\n"," [0. 0. 0. 0. 0. 0.]]\n","Q-table shape:  (500, 6)\n"]}],"source":["# Create our Q table with state_size rows and action_size columns (500x6)\n","Qtable_taxi = initialize_q_table(state_space, action_space)\n","print(Qtable_taxi)\n","print(\"Q-table shape: \", Qtable_taxi .shape)"]},{"cell_type":"markdown","metadata":{"id":"gUMKPH0_LJyH"},"source":["## Define the hyperparameters ⚙️\n","\n","⚠ DO NOT MODIFY EVAL_SEED: the eval_seed array **allows us to evaluate your agent with the same taxi starting positions for every classmate**"]},{"cell_type":"code","execution_count":52,"metadata":{"id":"AB6n__hhg7YS","executionInfo":{"status":"ok","timestamp":1696093093649,"user_tz":-60,"elapsed":295,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["# Training parameters\n","n_training_episodes = 2500000  # Total training episodes\n","learning_rate = 0.7            # Learning rate\n","\n","# Evaluation parameters\n","n_eval_episodes = 100        # Total number of test episodes\n","\n","# DO NOT MODIFY EVAL_SEED\n","eval_seed = [16,54,165,177,191,191,120,80,149,178,48,38,6,125,174,73,50,172,100,148,146,6,25,40,68,148,49,167,9,97,164,176,61,7,54,55,\n"," 161,131,184,51,170,12,120,113,95,126,51,98,36,135,54,82,45,95,89,59,95,124,9,113,58,85,51,134,121,169,105,21,30,11,50,65,12,43,82,145,152,97,106,55,31,85,38,\n"," 112,102,168,123,97,21,83,158,26,80,63,5,81,32,11,28,148] # Evaluation seed, this ensures that all classmates agents are trained on the same taxi starting position\n","                                                          # Each seed has a specific starting state\n","\n","# Environment parameters\n","env_id = \"Taxi-v3\"           # Name of the environment\n","max_steps = 99               # Max steps per episode\n","gamma = 0.95                 # Discounting rate\n","\n","# Exploration parameters\n","max_epsilon = 1.0             # Exploration probability at start\n","min_epsilon = 0.05           # Minimum exploration probability\n","decay_rate = 0.005            # Exponential decay rate for exploration prob\n"]},{"cell_type":"markdown","metadata":{"id":"1TMORo1VLTsX"},"source":["## Train our Q-Learning agent 🏃"]},{"cell_type":"code","execution_count":53,"metadata":{"id":"WwP3Y2z2eS-K","colab":{"base_uri":"https://localhost:8080/","height":281,"referenced_widgets":["f1aba0ec8e784c42b112a74472267f0f","f258e37b90e64527b3fb4cc87c110bfb","e4d2875db2a441d1b1f9a63b43b3cdcd","d84cd9996c784094af24f27013b5c970","405c66fe1e5f4ebda88959e965fa0c17","f4eb5fcc8fb64019a0a4a3984d338473","0230faf9f7314e90aceeeb14735f80fd","aa4fcdf1ebc249a58f810fb36006bdb8","c1052fddbce24e67811b2e3c84801460","830018b1d3324418a7e8b90fcc4535c1","e27e7697b17543c4a14e2a9d35f32f6e"]},"executionInfo":{"status":"ok","timestamp":1696094912922,"user_tz":-60,"elapsed":1817873,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"69a04b7d-349d-4427-854d-bda536baaf66"},"outputs":[{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/2500000 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f1aba0ec8e784c42b112a74472267f0f"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,\n","         0.        ],\n","       [ 2.75200369,  3.94947757,  2.75200369,  3.94947757,  5.20997639,\n","        -5.05052243],\n","       [ 7.93349184,  9.40367562,  7.93349184,  9.40367562, 10.9512375 ,\n","         0.40367562],\n","       ...,\n","       [10.9512375 , 12.58025   , 10.9512375 ,  9.40367562,  1.9512375 ,\n","         1.9512375 ],\n","       [ 5.20997639,  6.53681725,  5.20997639,  6.53681725, -3.79002361,\n","        -3.79002361],\n","       [16.1       , 14.295     , 16.1       , 18.        ,  7.1       ,\n","         7.1       ]])"]},"metadata":{},"execution_count":53}],"source":["Qtable_taxi = train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable_taxi, learning_rate)\n","Qtable_taxi"]},{"cell_type":"code","source":["mean_reward, std_reward = evaluate_agent(env, max_steps, n_eval_episodes, Qtable_taxi, eval_seed)\n","print(f\"Reward: {mean_reward} +/- {std_reward}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":67,"referenced_widgets":["9d44bf64d8714ac285554a47ade9d4db","09d306353c6249d282c7093d30dd2b2c","de2a248a1f2b4c6f94d9ae1ba7accb82","54393e0489ec442a9e36942546d281b2","849dfdd2321c4bbfa90e8aa8e21c38a7","ef2d2e9fabc6494ba0648b669d6a91d2","4457995c30a04396872f866184bd8151","0e566cd2a6ed45bf8c0feabef523ee59","72a3cf500fd34d9bade3055360bc3d8e","b6aacba6d11d49cabac66c42049319bc","614bc7497c84455197017396a30e5501"]},"id":"3V1cHjJO_DZA","executionInfo":{"status":"ok","timestamp":1696094912923,"user_tz":-60,"elapsed":7,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"ac934856-19b7-4a66-935a-f48b25a9f1e2"},"execution_count":54,"outputs":[{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/100 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9d44bf64d8714ac285554a47ade9d4db"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Reward: 7.56 +/- 2.706732347314747\n"]}]},{"cell_type":"markdown","metadata":{"id":"wPdu0SueLVl2"},"source":["## Create a model dictionary 💾 and publish our trained model to the Hub 🔥\n","\n","- We create a model dictionary that will contain all the training hyperparameters for reproducibility and the Q-Table.\n"]},{"cell_type":"code","execution_count":55,"metadata":{"id":"0a1FpE_3hNYr","executionInfo":{"status":"ok","timestamp":1696094918860,"user_tz":-60,"elapsed":224,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["model = {\n","    \"env_id\": env_id,\n","    \"max_steps\": max_steps,\n","    \"n_training_episodes\": n_training_episodes,\n","    \"n_eval_episodes\": n_eval_episodes,\n","    \"eval_seed\": eval_seed,\n","\n","    \"learning_rate\": learning_rate,\n","    \"gamma\": gamma,\n","\n","    \"max_epsilon\": max_epsilon,\n","    \"min_epsilon\": min_epsilon,\n","    \"decay_rate\": decay_rate,\n","\n","    \"qtable\": Qtable_taxi\n","}"]},{"cell_type":"code","execution_count":56,"metadata":{"id":"dhQtiQozhOn1","colab":{"base_uri":"https://localhost:8080/","height":251,"referenced_widgets":["e697bc9541244dcaa063bce1596f27a4","4dda7398d8594ccc9056276f7086abb3","fa7f5b5dea4b4f77b8b28f646b54df4d","713789465a484c75858710654d5f34d9","ba9ec78d22d14e4a8c1d9c8a0c35f696","e3c765f08dd84843a56884e7c347f70a","0bc6d042f62e406992a811fe915c99af","4ac8c0103af949bb9f6bd1d7cdf34031","5d38797e81ad4a6dab19d67e0a24e0cf","d02d6c4b5f754706b83563becedbc163","f6756d58b1854092a11dc0648bb62ec3","2455e834526a41e798e7368d26b1e47a","0bf9e3901a1d43b9ae5ec1d7c91dec3e","ecc1104e2cd94b77b04e3a05ed024a8d","213cc41ca4434c1fab2a80b9fe20dbc4","b75c940c972a48298593633c7647771c","4d855e8be73b44699a632b7b1c913824","625bba259547431e906659738a263c54","ca610db4b8c04bfeb043050345454fab","eeb4422d56d14b70a3b6b3253f851003","e37dcb6119d04d11adc2efd051b43ecd","309b664e615e43298fe1d15f733f6ba1","a5f463ba70bb477f9f407e9cd0f66bb2","f113595cdc3f4968800334139d28cde8","f3ef64aa553840feab4b0a1fcc288d86","f3d8b3510494430484097dd72b9fa95d","dcb89e796a5c43f7b5899ea12243235d","eab72c7103c14e8d9027dd2651bdbd3c","b992492d93b5486ebcfd33126a56400d","a9e83f7bef3843b999f120332f791c85","002591ac452b4a4683a798a95b5bcc4f","f8fac9672368448db9ed4c7d04f067af","bfb9222136364ef793db561b60b8dcf5","2e1ca4c549d249d5b5043ef9c1c3f919","9af46f486d1942388114bd9d795c1211","8923229253dd4919937143b0cab1138e","517c081d6608445bbc0b645865c85680","9f4d341ac2b04623a80cb87de091c8e7","5efb90d5297f4e06b1425d3bd9cef568","eff5618b6647405996bb4a46d8c0b767","fbb4ab140ce24c69a9f19f3e10bae77d","d2c972960c194d88bbb7b3018331aec3","c2983191da0e4b6f9f8857455200aa83","f5228016014a4413af718462fa11b3f3","fe714ee300f141fd9f7a517f1c289cef","84961d5178fe419db1e61d8e128e73a9","fdcdc72928f343128f27143d8e01eeab","7100dbb5532a4e268b510e81a828dabc","1615b15e28b34dbb955e26d30502025d","dfbb1f86a79c4f97bc59dc276ed9c79c","7f20fc7b659c40cdbfc4919f22d1be7e","115600bc5a1b41a1899ce6c955f2feff","d5872793d51541289fbfb88601525d22","97321a1444e14dcda5ca10bfd6ae8fb1","023d6586ca674d3d9557b965c4c270cc"]},"executionInfo":{"status":"ok","timestamp":1696094925125,"user_tz":-60,"elapsed":3204,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"187a0ad7-522f-4dba-e6d4-b7d43f44dc45"},"outputs":[{"output_type":"display_data","data":{"text/plain":["Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e697bc9541244dcaa063bce1596f27a4"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading (…)94191/.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2455e834526a41e798e7368d26b1e47a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/100 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a5f463ba70bb477f9f407e9cd0f66bb2"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/100 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2e1ca4c549d249d5b5043ef9c1c3f919"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["False\n"]},{"output_type":"stream","name":"stderr","text":["WARNING:imageio_ffmpeg:IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (550, 350) to (560, 352) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n"]},{"output_type":"display_data","data":{"text/plain":["q-learning.pkl:   0%|          | 0.00/24.6k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"fe714ee300f141fd9f7a517f1c289cef"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Your model is pushed to the Hub. You can view your model here:  https://huggingface.co/jake-walker/q-Taxi-v3\n"]}],"source":["username = \"jake-walker\" # FILL THIS\n","repo_name = \"q-Taxi-v3\" # FILL THIS\n","push_to_hub(\n","    repo_id=f\"{username}/{repo_name}\",\n","    model=model,\n","    env=env)"]},{"cell_type":"markdown","metadata":{"id":"ZgSdjgbIpRti"},"source":["Now that it's on the Hub, you can compare the results of your Taxi-v3 with your classmates using the leaderboard 🏆 👉 https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard\n","\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit2/taxi-leaderboard.png\" alt=\"Taxi Leaderboard\">"]},{"cell_type":"markdown","metadata":{"id":"bzgIO70c0bu2"},"source":["# Part 3: Load from Hub 🔽\n","\n","What's amazing with Hugging Face Hub 🤗 is that you can easily load powerful models from the community.\n","\n","Loading a saved model from the Hub is really easy:\n","\n","1. You go https://huggingface.co/models?other=q-learning to see the list of all the q-learning saved models.\n","2. You select one and copy its repo_id\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit2/copy-id.png\" alt=\"Copy id\">"]},{"cell_type":"markdown","metadata":{"id":"gTth6thRoC6X"},"source":["3. Then we just need to use `load_from_hub` with:\n","- The repo_id\n","- The filename: the saved model inside the repo."]},{"cell_type":"markdown","metadata":{"id":"EtrfoTaBoNrd"},"source":["#### Do not modify this code"]},{"cell_type":"code","execution_count":57,"metadata":{"id":"Eo8qEzNtCaVI","executionInfo":{"status":"ok","timestamp":1696095062038,"user_tz":-60,"elapsed":234,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["from urllib.error import HTTPError\n","\n","from huggingface_hub import hf_hub_download\n","\n","\n","def load_from_hub(repo_id: str, filename: str) -> str:\n","    \"\"\"\n","    Download a model from Hugging Face Hub.\n","    :param repo_id: id of the model repository from the Hugging Face Hub\n","    :param filename: name of the model zip file from the repository\n","    \"\"\"\n","    # Get the model from the Hub, download and cache the model on your local disk\n","    pickle_model = hf_hub_download(\n","        repo_id=repo_id,\n","        filename=filename\n","    )\n","\n","    with open(pickle_model, 'rb') as f:\n","      downloaded_model_file = pickle.load(f)\n","\n","    return downloaded_model_file"]},{"cell_type":"markdown","metadata":{"id":"b_sM2gNioPZH"},"source":["### ."]},{"cell_type":"code","execution_count":59,"metadata":{"id":"JUm9lz2gCQcU","colab":{"base_uri":"https://localhost:8080/","height":351,"referenced_widgets":["db4eb369c18749fab4b992626b0d7ee2","eee8dc2819414ec8b385cdaf9ca31679","d8300151209b4bf69dd40293c4829c68","78ac941b16744bba8546042f2e1be9fa","a96ade3a25cb4fab9696e00f1d56249b","8679d7521cdf41c6bc090635f507e9d3","2545f2ad186d45f7a779f423959ebfa5","1ff751ff2be5462a825a3a85a991c9c2","033b057020714f4b8e8d93fb450fb5e2","b8e75d3f9be8427a82c61a6480bc7f78","348c3154cb1344fb9a6db7f5be8d3e72","f818282ac4ef46fe8f073e997e2364e5","4407635f19394c4cab829aa77324d44f","cf18d12d46a04fd4a99fe80819ba7a7b","60c019c2adf745c8aa616a7269f03ac5","3a86d2cd279049b38ba189824cafeae6","cd38950d068f46e38cb085847661515d","615e0eed539c4b2c88449b9cf9a698d8","7e97a0cc37b9427d820a184b88ffe64e","34ab33b6a93147c6a776f2c8e41198c2","3f6d00d90d384d768a7712cff9adf03f","a3136bd489b94a91a248eb08faddf549"]},"executionInfo":{"status":"ok","timestamp":1696095092963,"user_tz":-60,"elapsed":671,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"006cc6a5-403c-4345-bd60-7cd5898987d6"},"outputs":[{"output_type":"display_data","data":{"text/plain":["Downloading (…)/main/q-learning.pkl:   0%|          | 0.00/24.6k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"db4eb369c18749fab4b992626b0d7ee2"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["{'env_id': 'Taxi-v3', 'max_steps': 99, 'n_training_episodes': 10000, 'n_eval_episodes': 100, 'eval_seed': [54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54], 'learning_rate': 0.05, 'gamma': 0.95, 'epsilon': 1.0, 'max_epsilon': 1.0, 'min_epsilon': 0.5, 'decay_rate': 0.0001, 'qtable': array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,\n","         0.        ],\n","       [ 2.75168986,  3.94808144,  2.74835115,  3.94708535,  5.20997639,\n","        -5.05152611],\n","       [ 7.933411  ,  9.40364156,  7.93341878,  9.40365555, 10.9512375 ,\n","         0.40364221],\n","       ...,\n","       [10.84469076, 12.58021117, 10.85413417,  9.37024088,  1.83772561,\n","         1.77035246],\n","       [ 5.14518348,  6.53679411,  5.14878828,  6.4627824 , -3.82724811,\n","        -3.83785862],\n","       [16.09862582, 14.28988384, 16.09821048, 18.        ,  7.09786796,\n","         7.09268876]])}\n"]},{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/100 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f818282ac4ef46fe8f073e997e2364e5"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["(12.0, 0.0)"]},"metadata":{},"execution_count":59}],"source":["model = load_from_hub(repo_id=\"tjscollins/q-Taxi-v3\", filename=\"q-learning.pkl\") # Try to use another model\n","\n","print(model)\n","env = gym.make(model[\"env_id\"])\n","\n","evaluate_agent(env, model[\"max_steps\"], model[\"n_eval_episodes\"], model[\"qtable\"], model[\"eval_seed\"])"]},{"cell_type":"code","execution_count":60,"metadata":{"id":"O7pL8rg1MulN","colab":{"base_uri":"https://localhost:8080/","height":99,"referenced_widgets":["981ab71773204dccb4e8a323dd97efc0","e8ccf2360f654f92941de06d5b98cb3f","e16a5839a4f4449abe265f3f2cfa6261","429e7232f611491385805a4dd1c92e7a","e5e43e14975a4b2685da1fada90ce101","dc5b85f0ca2a41b6914ccda37b6e0438","b987cbd3a33a4110b5c9d7b3ce333b45","c7dbd12a1e3a4691a956c7e7476d4912","af27db0b6e5546028c4518df44ef6449","4b4697fc763a48c592f8f494f37b558d","4e7e917d38f64b1e8ef08c224c0ddee7","5876625ed27c45bf9e15631657f13565","8099cffb2b204cfb92cddd328965e152","804fbde59727408590095db9c5bdad35","b97d7581248344998347aa3bc62b4f4d","ff9c6ed16e2545aabdcecfeb620f2d5d","dbe820d70e8f480babd7b15e80c3356b","fc0477020d8b4448aa6ddbb35acdb942","1b9185b2dd6447b8b5324029374ea8c8","74f997c338c844b28d6eb5279634d779","047d00fb6d06450d9e72081111324ac8","02936f5cd8ea48c8af1eb0cee49ddcd3"]},"executionInfo":{"status":"ok","timestamp":1696095117203,"user_tz":-60,"elapsed":779,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"f0196b7b-532b-4f6a-cc94-736ce6db2acb"},"outputs":[{"output_type":"display_data","data":{"text/plain":["Downloading q-learning.pkl:   0%|          | 0.00/933 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"981ab71773204dccb4e8a323dd97efc0"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["  0%|          | 0/100 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5876625ed27c45bf9e15631657f13565"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["(1.0, 0.0)"]},"metadata":{},"execution_count":60}],"source":["model = load_from_hub(repo_id=\"ThomasSimonini/q-FrozenLake-v1-no-slippery\", filename=\"q-learning.pkl\") # Try to use another model\n","\n","env = gym.make(model[\"env_id\"], is_slippery=False)\n","\n","evaluate_agent(env, model[\"max_steps\"], model[\"n_eval_episodes\"], model[\"qtable\"], model[\"eval_seed\"])"]},{"cell_type":"markdown","metadata":{"id":"BQAwLnYFPk-s"},"source":["## Some additional challenges 🏆\n","\n","The best way to learn **is to try things on your own**! As you saw, the current agent is not doing great. As a first suggestion, you can train for more steps. With 1,000,000 steps, we saw some great results!\n","\n","In the [Leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) you will find your agents. Can you get to the top?\n","\n","Here are some ideas to climb up the leaderboard:\n","\n","* Train more steps\n","* Try different hyperparameters by looking at what your classmates have done.\n","* **Push your new trained model** on the Hub 🔥\n","\n","Are walking on ice and driving taxis too boring to you? Try to **change the environment**, why not use FrozenLake-v1 slippery version? Check how they work [using the gymnasium documentation](https://gymnasium.farama.org/) and have fun 🎉."]},{"cell_type":"markdown","metadata":{"id":"p-fW-EU5WejJ"},"source":["_____________________________________________________________________\n","Congrats 🥳, you've just implemented, trained, and uploaded your first Reinforcement Learning agent.\n","\n","Understanding Q-Learning is an **important step to understanding value-based methods.**\n","\n","In the next Unit with Deep Q-Learning, we'll see that while creating and updating a Q-table was a good strategy — **however, it is not scalable.**\n","\n","For instance, imagine you create an agent that learns to play Doom.\n","\n","<img src=\"https://vizdoom.cs.put.edu.pl/user/pages/01.tutorial/basic.png\" alt=\"Doom\"/>\n","\n","Doom is a large environment with a huge state space (millions of different states). Creating and updating a Q-table for that environment would not be efficient.\n","\n","That's why we'll study Deep Q-Learning in the next unit, an algorithm **where we use a neural network that approximates, given a state, the different Q-values for each action.**\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit4/atari-envs.gif\" alt=\"Environments\"/>\n"]},{"cell_type":"markdown","metadata":{"id":"BjLhT70TEZIn"},"source":["See you in Unit 3! 🔥\n","\n","## Keep learning, stay awesome 🤗"]}],"metadata":{"colab":{"provenance":[{"file_id":"https://github.com/huggingface/deep-rl-class/blob/master/notebooks/unit2/unit2.ipynb","timestamp":1696078055017}],"collapsed_sections":["Ji_UrI5l2zzn","67OdoKL63eDD","B2_-8b8z5k54","8R5ej1fS4P2V","Pnpk2ePoem3r"]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"e6f0cdea90a54a409d1b6bb5da149fa2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_1c2b811eaf5c46bdbae02a2c61a51ffd","IPY_MODEL_840f4495c39f4dfca4ae71602ccd41e9","IPY_MODEL_3de082bd7cbd40b9be4aec0e684becba"],"layout":"IPY_MODEL_9833920632dd4f988b6f18e8caccbc5d"}},"1c2b811eaf5c46bdbae02a2c61a51ffd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_828041fe06b74a6a9ea122ff94a4e2a4","placeholder":"​","style":"IPY_MODEL_c0a0ecce9da741c6884684ac9224e388","value":"100%"}},"840f4495c39f4dfca4ae71602ccd41e9":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_41af6d560ff648b58032316a7367e001","max":10000,"min":0,"orientation":"horizontal","style":"IPY_MODEL_8ac7eea521864e47bc77533e33a5990e","value":10000}},"3de082bd7cbd40b9be4aec0e684becba":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_98ac2fcbdebd41e8a0465564c7f9f73c","placeholder":"​","style":"IPY_MODEL_fb6a4a28f32c417f88ae9b485051f39e","value":" 10000/10000 [00:03&lt;00:00, 3824.86it/s]"}},"9833920632dd4f988b6f18e8caccbc5d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"828041fe06b74a6a9ea122ff94a4e2a4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c0a0ecce9da741c6884684ac9224e388":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"41af6d560ff648b58032316a7367e001":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8ac7eea521864e47bc77533e33a5990e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"98ac2fcbdebd41e8a0465564c7f9f73c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fb6a4a28f32c417f88ae9b485051f39e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9288c30c30e14f8e964256563722f0f1":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_fdd5af398f844d6199f7be41e14b90d9","IPY_MODEL_45712c72b8024eda976dfe6791af84d7","IPY_MODEL_e0effc981c2d4baeb1a6cf6224b22fc4"],"layout":"IPY_MODEL_04878d4e8ebf4d5d86fef0edc22b03e7"}},"fdd5af398f844d6199f7be41e14b90d9":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6bb9b7084bf74043914f4f4b6d35469f","placeholder":"​","style":"IPY_MODEL_e521204e0833433b825b3036d14d02ee","value":"100%"}},"45712c72b8024eda976dfe6791af84d7":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_810a3f5035d648779b5550f82f64264c","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_0a231d4af7cb4fe29f2be2bce9a0eb8c","value":100}},"e0effc981c2d4baeb1a6cf6224b22fc4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d70c52144c824b6ab1133576ed52c635","placeholder":"​","style":"IPY_MODEL_21ea34cbd4724403b10cff3a41461e06","value":" 100/100 [00:00&lt;00:00, 56.15it/s]"}},"04878d4e8ebf4d5d86fef0edc22b03e7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6bb9b7084bf74043914f4f4b6d35469f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e521204e0833433b825b3036d14d02ee":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"810a3f5035d648779b5550f82f64264c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0a231d4af7cb4fe29f2be2bce9a0eb8c":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d70c52144c824b6ab1133576ed52c635":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"21ea34cbd4724403b10cff3a41461e06":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4119e481cf774e14a39e3480bf3dee23":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_918c9e6775bf4f319dbc4e68057f2ce0","IPY_MODEL_b73a1998a2e74c17a2dcdff3bfcdff5c","IPY_MODEL_48437e2d495c4646a31c7647820e8f77","IPY_MODEL_e4db486e42914c94bf41f4f521e7085a"],"layout":"IPY_MODEL_747b9c6d02e54bef8cc875a06ee0caae"}},"c55f984de9d0425792199bb0204b2a88":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_166beba22ad74a7fbf21d101156b9f28","placeholder":"​","style":"IPY_MODEL_d9ad51d7a6524f9884c85b6ae14d7212","value":"<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"}},"5ae9e2d6dc10461f8d1f99b5ed460946":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_8591dbbbc0fe4a20ab143914dd849b4c","placeholder":"​","style":"IPY_MODEL_bde8da2b29d64d5cbc158a34697acb7b","value":""}},"cd98d11981f34cdcb8209cbaaf74f61e":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_cfa99abd6ac34994bd48b5919375c96b","style":"IPY_MODEL_ce25d48725e84931b5a1ad3f0c5cd057","value":true}},"e9244f63c5f5486898935cd4ba84fed8":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_ebc0a564f44c4428ae315638657f15de","style":"IPY_MODEL_79760d8954834265a6c7f637a5f5f57b","tooltip":""}},"9496a0198bc64e3ea7180059fe35cb58":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ee7230b508824636aa1de3aa9e547537","placeholder":"​","style":"IPY_MODEL_4dbdec8615eb43728988b1e92868f7bf","value":"\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"}},"747b9c6d02e54bef8cc875a06ee0caae":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"166beba22ad74a7fbf21d101156b9f28":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d9ad51d7a6524f9884c85b6ae14d7212":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8591dbbbc0fe4a20ab143914dd849b4c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bde8da2b29d64d5cbc158a34697acb7b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"cfa99abd6ac34994bd48b5919375c96b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ce25d48725e84931b5a1ad3f0c5cd057":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ebc0a564f44c4428ae315638657f15de":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"79760d8954834265a6c7f637a5f5f57b":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"ee7230b508824636aa1de3aa9e547537":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4dbdec8615eb43728988b1e92868f7bf":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6fc271597c73420385a1011461269bb8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_75744d5a04a0483fa6b19711b287a59c","placeholder":"​","style":"IPY_MODEL_440f9915f04e4972b963258808503a9c","value":"Connecting..."}},"75744d5a04a0483fa6b19711b287a59c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"440f9915f04e4972b963258808503a9c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"918c9e6775bf4f319dbc4e68057f2ce0":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_04e81cad97634c9a830d38da5a026e25","placeholder":"​","style":"IPY_MODEL_6ab23d487ce94e8cb628096346ae7f10","value":"Token is valid (permission: write)."}},"b73a1998a2e74c17a2dcdff3bfcdff5c":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_52d901fc9d0b4c21805aa432737c1009","placeholder":"​","style":"IPY_MODEL_bc43bc4504004b2285d7c087b92f6d6e","value":"Your token has been saved in your configured git credential helpers (store)."}},"48437e2d495c4646a31c7647820e8f77":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4534b7cb3f374ec594312a73a63cb2aa","placeholder":"​","style":"IPY_MODEL_e9f846e8823241f680c4f077cabbedac","value":"Your token has been saved to /root/.cache/huggingface/token"}},"e4db486e42914c94bf41f4f521e7085a":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_548d1565762546889e4af50c2002619b","placeholder":"​","style":"IPY_MODEL_201a94eea14741e1a2c19072205df7f7","value":"Login successful"}},"04e81cad97634c9a830d38da5a026e25":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6ab23d487ce94e8cb628096346ae7f10":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"52d901fc9d0b4c21805aa432737c1009":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bc43bc4504004b2285d7c087b92f6d6e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4534b7cb3f374ec594312a73a63cb2aa":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e9f846e8823241f680c4f077cabbedac":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"548d1565762546889e4af50c2002619b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"201a94eea14741e1a2c19072205df7f7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6363a5b1c65749c6977f97c2b00c55ec":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b54c8a87ab0d4850aaf8f9e6e5af5de3","IPY_MODEL_196e8c478db34d4c949ada9697d87f2d","IPY_MODEL_518fa64f8c3846e886468ac731edbfaa"],"layout":"IPY_MODEL_b2b2baeb9d4c415bab801855351d4ce1"}},"b54c8a87ab0d4850aaf8f9e6e5af5de3":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c376e2f1509947b0b0115160bbd121d2","placeholder":"​","style":"IPY_MODEL_2b2ed6fb86f344a69a2cbc5e0f4fd479","value":"Fetching 1 files: 100%"}},"196e8c478db34d4c949ada9697d87f2d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e3cc65bebe4a4d0cbd765bb826cae2df","max":1,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c4abebed7d954ac9b12d7f02240a0895","value":1}},"518fa64f8c3846e886468ac731edbfaa":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b4503a457c044fd2aa5c58391427571c","placeholder":"​","style":"IPY_MODEL_c60c04f8a206433f9a75cb9f3568bf20","value":" 1/1 [00:00&lt;00:00,  3.83it/s]"}},"b2b2baeb9d4c415bab801855351d4ce1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c376e2f1509947b0b0115160bbd121d2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2b2ed6fb86f344a69a2cbc5e0f4fd479":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e3cc65bebe4a4d0cbd765bb826cae2df":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c4abebed7d954ac9b12d7f02240a0895":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b4503a457c044fd2aa5c58391427571c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c60c04f8a206433f9a75cb9f3568bf20":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ae5a832a46d94ce68c4a4c9e2636a562":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c0a9f2753fb24bd899dccc94e914a296","IPY_MODEL_7098717195b54ad481f7bbdfd1f988d9","IPY_MODEL_c59b3e51fc78441488eb0823f4482730"],"layout":"IPY_MODEL_f5d23de601f643158c10b9a5df7914aa"}},"c0a9f2753fb24bd899dccc94e914a296":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d9f2b2d47c6b4249a89a38d7f9e6ff67","placeholder":"​","style":"IPY_MODEL_807481d4c14b43b7b03f75f4f546b9ff","value":"100%"}},"7098717195b54ad481f7bbdfd1f988d9":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ff70d642a053434eaf8500872709e56f","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_abcb725b1d74471c8d7bba61ad2de760","value":100}},"c59b3e51fc78441488eb0823f4482730":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5cd1832337c14c3cac847142c0d2a18e","placeholder":"​","style":"IPY_MODEL_bb44cc6e87654874a0a1bec09b5a54f4","value":" 100/100 [00:00&lt;00:00, 2229.58it/s]"}},"f5d23de601f643158c10b9a5df7914aa":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d9f2b2d47c6b4249a89a38d7f9e6ff67":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"807481d4c14b43b7b03f75f4f546b9ff":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ff70d642a053434eaf8500872709e56f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"abcb725b1d74471c8d7bba61ad2de760":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"5cd1832337c14c3cac847142c0d2a18e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bb44cc6e87654874a0a1bec09b5a54f4":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"73702d5503374a0d9787365c45c5da4c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0134a25d0993417a8cac06177939e642","IPY_MODEL_c503c61dac1c405991dd693ded8f0947","IPY_MODEL_8e75b304812a4ca0b3c6e963a41f2560"],"layout":"IPY_MODEL_416154146a6d41fbb8231b334c26e02e"}},"0134a25d0993417a8cac06177939e642":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8aed36c53c84744a02b1f6578c56233","placeholder":"​","style":"IPY_MODEL_72b575b0d9b041b2b0465509d2c4158d","value":"100%"}},"c503c61dac1c405991dd693ded8f0947":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e22407fa6ac54bca94ffdfc76be62b5f","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_12120b5c42b0445fb9afcd651bdd075e","value":100}},"8e75b304812a4ca0b3c6e963a41f2560":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d125a8de2fdb4478b08a47ce232db088","placeholder":"​","style":"IPY_MODEL_f7c5020bc4ac43bba38e70cfe7379283","value":" 100/100 [00:00&lt;00:00, 2111.43it/s]"}},"416154146a6d41fbb8231b334c26e02e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f8aed36c53c84744a02b1f6578c56233":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"72b575b0d9b041b2b0465509d2c4158d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e22407fa6ac54bca94ffdfc76be62b5f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"12120b5c42b0445fb9afcd651bdd075e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d125a8de2fdb4478b08a47ce232db088":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f7c5020bc4ac43bba38e70cfe7379283":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0ae97a99f86540079c48b842681c6460":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e5356c548e84454585a645b66ffe84e0","IPY_MODEL_728955de7f2940229f35febdf2a52fee","IPY_MODEL_e6a280b5696b4c7e934d162cbb9f47b2"],"layout":"IPY_MODEL_15eb4d2b8b124ddf96d07bf9ccdf6410"}},"e5356c548e84454585a645b66ffe84e0":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5546de085a3841f78067d8c4a694c6b3","placeholder":"​","style":"IPY_MODEL_d9326170d94d4a5db9a2262ef2299149","value":"q-learning.pkl: 100%"}},"728955de7f2940229f35febdf2a52fee":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fd1d7bf7b736412bb6b45ae5446bb5dc","max":914,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9a5506c45ccd451695ec028e35a85560","value":914}},"e6a280b5696b4c7e934d162cbb9f47b2":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0d9bed76678d42e9b9e707edda2fb334","placeholder":"​","style":"IPY_MODEL_dbb8148d047847de9a0b5c72004b821a","value":" 914/914 [00:00&lt;00:00, 2.75kB/s]"}},"15eb4d2b8b124ddf96d07bf9ccdf6410":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5546de085a3841f78067d8c4a694c6b3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d9326170d94d4a5db9a2262ef2299149":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fd1d7bf7b736412bb6b45ae5446bb5dc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9a5506c45ccd451695ec028e35a85560":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"0d9bed76678d42e9b9e707edda2fb334":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dbb8148d047847de9a0b5c72004b821a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f1aba0ec8e784c42b112a74472267f0f":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f258e37b90e64527b3fb4cc87c110bfb","IPY_MODEL_e4d2875db2a441d1b1f9a63b43b3cdcd","IPY_MODEL_d84cd9996c784094af24f27013b5c970"],"layout":"IPY_MODEL_405c66fe1e5f4ebda88959e965fa0c17"}},"f258e37b90e64527b3fb4cc87c110bfb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f4eb5fcc8fb64019a0a4a3984d338473","placeholder":"​","style":"IPY_MODEL_0230faf9f7314e90aceeeb14735f80fd","value":"100%"}},"e4d2875db2a441d1b1f9a63b43b3cdcd":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_aa4fcdf1ebc249a58f810fb36006bdb8","max":2500000,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c1052fddbce24e67811b2e3c84801460","value":2500000}},"d84cd9996c784094af24f27013b5c970":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_830018b1d3324418a7e8b90fcc4535c1","placeholder":"​","style":"IPY_MODEL_e27e7697b17543c4a14e2a9d35f32f6e","value":" 2500000/2500000 [30:17&lt;00:00, 1220.58it/s]"}},"405c66fe1e5f4ebda88959e965fa0c17":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f4eb5fcc8fb64019a0a4a3984d338473":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0230faf9f7314e90aceeeb14735f80fd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"aa4fcdf1ebc249a58f810fb36006bdb8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c1052fddbce24e67811b2e3c84801460":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"830018b1d3324418a7e8b90fcc4535c1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e27e7697b17543c4a14e2a9d35f32f6e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9d44bf64d8714ac285554a47ade9d4db":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_09d306353c6249d282c7093d30dd2b2c","IPY_MODEL_de2a248a1f2b4c6f94d9ae1ba7accb82","IPY_MODEL_54393e0489ec442a9e36942546d281b2"],"layout":"IPY_MODEL_849dfdd2321c4bbfa90e8aa8e21c38a7"}},"09d306353c6249d282c7093d30dd2b2c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ef2d2e9fabc6494ba0648b669d6a91d2","placeholder":"​","style":"IPY_MODEL_4457995c30a04396872f866184bd8151","value":"100%"}},"de2a248a1f2b4c6f94d9ae1ba7accb82":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0e566cd2a6ed45bf8c0feabef523ee59","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_72a3cf500fd34d9bade3055360bc3d8e","value":100}},"54393e0489ec442a9e36942546d281b2":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b6aacba6d11d49cabac66c42049319bc","placeholder":"​","style":"IPY_MODEL_614bc7497c84455197017396a30e5501","value":" 100/100 [00:00&lt;00:00, 963.02it/s]"}},"849dfdd2321c4bbfa90e8aa8e21c38a7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ef2d2e9fabc6494ba0648b669d6a91d2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4457995c30a04396872f866184bd8151":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0e566cd2a6ed45bf8c0feabef523ee59":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"72a3cf500fd34d9bade3055360bc3d8e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b6aacba6d11d49cabac66c42049319bc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"614bc7497c84455197017396a30e5501":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e697bc9541244dcaa063bce1596f27a4":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4dda7398d8594ccc9056276f7086abb3","IPY_MODEL_fa7f5b5dea4b4f77b8b28f646b54df4d","IPY_MODEL_713789465a484c75858710654d5f34d9"],"layout":"IPY_MODEL_ba9ec78d22d14e4a8c1d9c8a0c35f696"}},"4dda7398d8594ccc9056276f7086abb3":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e3c765f08dd84843a56884e7c347f70a","placeholder":"​","style":"IPY_MODEL_0bc6d042f62e406992a811fe915c99af","value":"Fetching 1 files: 100%"}},"fa7f5b5dea4b4f77b8b28f646b54df4d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_4ac8c0103af949bb9f6bd1d7cdf34031","max":1,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5d38797e81ad4a6dab19d67e0a24e0cf","value":1}},"713789465a484c75858710654d5f34d9":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d02d6c4b5f754706b83563becedbc163","placeholder":"​","style":"IPY_MODEL_f6756d58b1854092a11dc0648bb62ec3","value":" 1/1 [00:00&lt;00:00,  1.64it/s]"}},"ba9ec78d22d14e4a8c1d9c8a0c35f696":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e3c765f08dd84843a56884e7c347f70a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0bc6d042f62e406992a811fe915c99af":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4ac8c0103af949bb9f6bd1d7cdf34031":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5d38797e81ad4a6dab19d67e0a24e0cf":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d02d6c4b5f754706b83563becedbc163":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f6756d58b1854092a11dc0648bb62ec3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2455e834526a41e798e7368d26b1e47a":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0bf9e3901a1d43b9ae5ec1d7c91dec3e","IPY_MODEL_ecc1104e2cd94b77b04e3a05ed024a8d","IPY_MODEL_213cc41ca4434c1fab2a80b9fe20dbc4"],"layout":"IPY_MODEL_b75c940c972a48298593633c7647771c"}},"0bf9e3901a1d43b9ae5ec1d7c91dec3e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4d855e8be73b44699a632b7b1c913824","placeholder":"​","style":"IPY_MODEL_625bba259547431e906659738a263c54","value":"Downloading (…)94191/.gitattributes: 100%"}},"ecc1104e2cd94b77b04e3a05ed024a8d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ca610db4b8c04bfeb043050345454fab","max":1519,"min":0,"orientation":"horizontal","style":"IPY_MODEL_eeb4422d56d14b70a3b6b3253f851003","value":1519}},"213cc41ca4434c1fab2a80b9fe20dbc4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e37dcb6119d04d11adc2efd051b43ecd","placeholder":"​","style":"IPY_MODEL_309b664e615e43298fe1d15f733f6ba1","value":" 1.52k/1.52k [00:00&lt;00:00, 45.7kB/s]"}},"b75c940c972a48298593633c7647771c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4d855e8be73b44699a632b7b1c913824":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"625bba259547431e906659738a263c54":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ca610db4b8c04bfeb043050345454fab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eeb4422d56d14b70a3b6b3253f851003":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e37dcb6119d04d11adc2efd051b43ecd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"309b664e615e43298fe1d15f733f6ba1":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a5f463ba70bb477f9f407e9cd0f66bb2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f113595cdc3f4968800334139d28cde8","IPY_MODEL_f3ef64aa553840feab4b0a1fcc288d86","IPY_MODEL_f3d8b3510494430484097dd72b9fa95d"],"layout":"IPY_MODEL_dcb89e796a5c43f7b5899ea12243235d"}},"f113595cdc3f4968800334139d28cde8":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_eab72c7103c14e8d9027dd2651bdbd3c","placeholder":"​","style":"IPY_MODEL_b992492d93b5486ebcfd33126a56400d","value":"100%"}},"f3ef64aa553840feab4b0a1fcc288d86":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a9e83f7bef3843b999f120332f791c85","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_002591ac452b4a4683a798a95b5bcc4f","value":100}},"f3d8b3510494430484097dd72b9fa95d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8fac9672368448db9ed4c7d04f067af","placeholder":"​","style":"IPY_MODEL_bfb9222136364ef793db561b60b8dcf5","value":" 100/100 [00:00&lt;00:00, 858.08it/s]"}},"dcb89e796a5c43f7b5899ea12243235d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eab72c7103c14e8d9027dd2651bdbd3c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b992492d93b5486ebcfd33126a56400d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a9e83f7bef3843b999f120332f791c85":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"002591ac452b4a4683a798a95b5bcc4f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f8fac9672368448db9ed4c7d04f067af":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bfb9222136364ef793db561b60b8dcf5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2e1ca4c549d249d5b5043ef9c1c3f919":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_9af46f486d1942388114bd9d795c1211","IPY_MODEL_8923229253dd4919937143b0cab1138e","IPY_MODEL_517c081d6608445bbc0b645865c85680"],"layout":"IPY_MODEL_9f4d341ac2b04623a80cb87de091c8e7"}},"9af46f486d1942388114bd9d795c1211":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5efb90d5297f4e06b1425d3bd9cef568","placeholder":"​","style":"IPY_MODEL_eff5618b6647405996bb4a46d8c0b767","value":"100%"}},"8923229253dd4919937143b0cab1138e":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fbb4ab140ce24c69a9f19f3e10bae77d","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d2c972960c194d88bbb7b3018331aec3","value":100}},"517c081d6608445bbc0b645865c85680":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c2983191da0e4b6f9f8857455200aa83","placeholder":"​","style":"IPY_MODEL_f5228016014a4413af718462fa11b3f3","value":" 100/100 [00:00&lt;00:00, 1353.43it/s]"}},"9f4d341ac2b04623a80cb87de091c8e7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5efb90d5297f4e06b1425d3bd9cef568":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eff5618b6647405996bb4a46d8c0b767":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fbb4ab140ce24c69a9f19f3e10bae77d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2c972960c194d88bbb7b3018331aec3":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c2983191da0e4b6f9f8857455200aa83":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f5228016014a4413af718462fa11b3f3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fe714ee300f141fd9f7a517f1c289cef":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_84961d5178fe419db1e61d8e128e73a9","IPY_MODEL_fdcdc72928f343128f27143d8e01eeab","IPY_MODEL_7100dbb5532a4e268b510e81a828dabc"],"layout":"IPY_MODEL_1615b15e28b34dbb955e26d30502025d"}},"84961d5178fe419db1e61d8e128e73a9":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dfbb1f86a79c4f97bc59dc276ed9c79c","placeholder":"​","style":"IPY_MODEL_7f20fc7b659c40cdbfc4919f22d1be7e","value":"q-learning.pkl: 100%"}},"fdcdc72928f343128f27143d8e01eeab":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_115600bc5a1b41a1899ce6c955f2feff","max":24572,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d5872793d51541289fbfb88601525d22","value":24572}},"7100dbb5532a4e268b510e81a828dabc":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_97321a1444e14dcda5ca10bfd6ae8fb1","placeholder":"​","style":"IPY_MODEL_023d6586ca674d3d9557b965c4c270cc","value":" 24.6k/24.6k [00:00&lt;00:00, 50.3kB/s]"}},"1615b15e28b34dbb955e26d30502025d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dfbb1f86a79c4f97bc59dc276ed9c79c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7f20fc7b659c40cdbfc4919f22d1be7e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"115600bc5a1b41a1899ce6c955f2feff":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d5872793d51541289fbfb88601525d22":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"97321a1444e14dcda5ca10bfd6ae8fb1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"023d6586ca674d3d9557b965c4c270cc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"db4eb369c18749fab4b992626b0d7ee2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_eee8dc2819414ec8b385cdaf9ca31679","IPY_MODEL_d8300151209b4bf69dd40293c4829c68","IPY_MODEL_78ac941b16744bba8546042f2e1be9fa"],"layout":"IPY_MODEL_a96ade3a25cb4fab9696e00f1d56249b"}},"eee8dc2819414ec8b385cdaf9ca31679":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8679d7521cdf41c6bc090635f507e9d3","placeholder":"​","style":"IPY_MODEL_2545f2ad186d45f7a779f423959ebfa5","value":"Downloading (…)/main/q-learning.pkl: 100%"}},"d8300151209b4bf69dd40293c4829c68":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1ff751ff2be5462a825a3a85a991c9c2","max":24589,"min":0,"orientation":"horizontal","style":"IPY_MODEL_033b057020714f4b8e8d93fb450fb5e2","value":24589}},"78ac941b16744bba8546042f2e1be9fa":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b8e75d3f9be8427a82c61a6480bc7f78","placeholder":"​","style":"IPY_MODEL_348c3154cb1344fb9a6db7f5be8d3e72","value":" 24.6k/24.6k [00:00&lt;00:00, 1.18MB/s]"}},"a96ade3a25cb4fab9696e00f1d56249b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8679d7521cdf41c6bc090635f507e9d3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2545f2ad186d45f7a779f423959ebfa5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1ff751ff2be5462a825a3a85a991c9c2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"033b057020714f4b8e8d93fb450fb5e2":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b8e75d3f9be8427a82c61a6480bc7f78":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"348c3154cb1344fb9a6db7f5be8d3e72":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f818282ac4ef46fe8f073e997e2364e5":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4407635f19394c4cab829aa77324d44f","IPY_MODEL_cf18d12d46a04fd4a99fe80819ba7a7b","IPY_MODEL_60c019c2adf745c8aa616a7269f03ac5"],"layout":"IPY_MODEL_3a86d2cd279049b38ba189824cafeae6"}},"4407635f19394c4cab829aa77324d44f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cd38950d068f46e38cb085847661515d","placeholder":"​","style":"IPY_MODEL_615e0eed539c4b2c88449b9cf9a698d8","value":"100%"}},"cf18d12d46a04fd4a99fe80819ba7a7b":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_7e97a0cc37b9427d820a184b88ffe64e","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_34ab33b6a93147c6a776f2c8e41198c2","value":100}},"60c019c2adf745c8aa616a7269f03ac5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3f6d00d90d384d768a7712cff9adf03f","placeholder":"​","style":"IPY_MODEL_a3136bd489b94a91a248eb08faddf549","value":" 100/100 [00:00&lt;00:00, 1102.31it/s]"}},"3a86d2cd279049b38ba189824cafeae6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cd38950d068f46e38cb085847661515d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"615e0eed539c4b2c88449b9cf9a698d8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7e97a0cc37b9427d820a184b88ffe64e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"34ab33b6a93147c6a776f2c8e41198c2":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"3f6d00d90d384d768a7712cff9adf03f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a3136bd489b94a91a248eb08faddf549":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"981ab71773204dccb4e8a323dd97efc0":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e8ccf2360f654f92941de06d5b98cb3f","IPY_MODEL_e16a5839a4f4449abe265f3f2cfa6261","IPY_MODEL_429e7232f611491385805a4dd1c92e7a"],"layout":"IPY_MODEL_e5e43e14975a4b2685da1fada90ce101"}},"e8ccf2360f654f92941de06d5b98cb3f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dc5b85f0ca2a41b6914ccda37b6e0438","placeholder":"​","style":"IPY_MODEL_b987cbd3a33a4110b5c9d7b3ce333b45","value":"Downloading q-learning.pkl: 100%"}},"e16a5839a4f4449abe265f3f2cfa6261":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c7dbd12a1e3a4691a956c7e7476d4912","max":933,"min":0,"orientation":"horizontal","style":"IPY_MODEL_af27db0b6e5546028c4518df44ef6449","value":933}},"429e7232f611491385805a4dd1c92e7a":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4b4697fc763a48c592f8f494f37b558d","placeholder":"​","style":"IPY_MODEL_4e7e917d38f64b1e8ef08c224c0ddee7","value":" 933/933 [00:00&lt;00:00, 50.0kB/s]"}},"e5e43e14975a4b2685da1fada90ce101":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dc5b85f0ca2a41b6914ccda37b6e0438":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b987cbd3a33a4110b5c9d7b3ce333b45":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c7dbd12a1e3a4691a956c7e7476d4912":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"af27db0b6e5546028c4518df44ef6449":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4b4697fc763a48c592f8f494f37b558d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4e7e917d38f64b1e8ef08c224c0ddee7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5876625ed27c45bf9e15631657f13565":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8099cffb2b204cfb92cddd328965e152","IPY_MODEL_804fbde59727408590095db9c5bdad35","IPY_MODEL_b97d7581248344998347aa3bc62b4f4d"],"layout":"IPY_MODEL_ff9c6ed16e2545aabdcecfeb620f2d5d"}},"8099cffb2b204cfb92cddd328965e152":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dbe820d70e8f480babd7b15e80c3356b","placeholder":"​","style":"IPY_MODEL_fc0477020d8b4448aa6ddbb35acdb942","value":"100%"}},"804fbde59727408590095db9c5bdad35":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1b9185b2dd6447b8b5324029374ea8c8","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_74f997c338c844b28d6eb5279634d779","value":100}},"b97d7581248344998347aa3bc62b4f4d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_047d00fb6d06450d9e72081111324ac8","placeholder":"​","style":"IPY_MODEL_02936f5cd8ea48c8af1eb0cee49ddcd3","value":" 100/100 [00:00&lt;00:00, 2042.39it/s]"}},"ff9c6ed16e2545aabdcecfeb620f2d5d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dbe820d70e8f480babd7b15e80c3356b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fc0477020d8b4448aa6ddbb35acdb942":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1b9185b2dd6447b8b5324029374ea8c8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"74f997c338c844b28d6eb5279634d779":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"047d00fb6d06450d9e72081111324ac8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"02936f5cd8ea48c8af1eb0cee49ddcd3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0}
\ No newline at end of file
diff --git a/HF DeepRL Course/Unit3 - Deep Q-Learning.ipynb b/HF DeepRL Course/Unit3 - Deep Q-Learning.ipynb
new file mode 100644
index 0000000..39a6308
--- /dev/null
+++ b/HF DeepRL Course/Unit3 - Deep Q-Learning.ipynb	
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"markdown","metadata":{"id":"k7xBVPzoXxOg"},"source":["# Unit 3: Deep Q-Learning with Atari Games 👾 using RL Baselines3 Zoo\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit4/thumbnail.jpg\" alt=\"Unit 3 Thumbnail\">\n","\n","In this notebook, **you'll train a Deep Q-Learning agent** playing Space Invaders using [RL Baselines3 Zoo](https://github.com/DLR-RM/rl-baselines3-zoo), a training framework based on [Stable-Baselines3](https://stable-baselines3.readthedocs.io/en/master/) that provides scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos.\n","\n","We're using the [RL-Baselines-3 Zoo integration, a vanilla version of Deep Q-Learning](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html) with no extensions such as Double-DQN, Dueling-DQN, and Prioritized Experience Replay.\n","\n","⬇️ Here is an example of what **you will achieve** ⬇️"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"J9S713biXntc","colab":{"base_uri":"https://localhost:8080/","height":231},"executionInfo":{"status":"ok","timestamp":1696584508361,"user_tz":-60,"elapsed":7,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"16b8d973-4da5-4222-efb5-ad0f924a53ef"},"outputs":[{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["<video controls autoplay><source src=\"https://huggingface.co/ThomasSimonini/ppo-SpaceInvadersNoFrameskip-v4/resolve/main/replay.mp4\" type=\"video/mp4\"></video>\n"]},"metadata":{}}],"source":["%%html\n","<video controls autoplay><source src=\"https://huggingface.co/ThomasSimonini/ppo-SpaceInvadersNoFrameskip-v4/resolve/main/replay.mp4\" type=\"video/mp4\"></video>"]},{"cell_type":"markdown","source":["### 🎮 Environments:\n","\n","- [SpacesInvadersNoFrameskip-v4](https://gymnasium.farama.org/environments/atari/space_invaders/)\n","\n","You can see the difference between Space Invaders versions here 👉 https://gymnasium.farama.org/environments/atari/space_invaders/#variants\n","\n","### 📚 RL-Library:\n","\n","- [RL-Baselines3-Zoo](https://github.com/DLR-RM/rl-baselines3-zoo)"],"metadata":{"id":"ykJiGevCMVc5"}},{"cell_type":"markdown","metadata":{"id":"wciHGjrFYz9m"},"source":["## Objectives of this notebook 🏆\n","At the end of the notebook, you will:\n","- Be able to understand deeper **how RL Baselines3 Zoo works**.\n","- Be able to **push your trained agent and the code to the Hub** with a nice video replay and an evaluation score 🔥.\n","\n","\n"]},{"cell_type":"markdown","source":["## This notebook is from Deep Reinforcement Learning Course\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/deep-rl-course-illustration.jpg\" alt=\"Deep RL Course illustration\"/>"],"metadata":{"id":"TsnP0rjxMn1e"}},{"cell_type":"markdown","metadata":{"id":"nw6fJHIAZd-J"},"source":["In this free course, you will:\n","\n","- 📖 Study Deep Reinforcement Learning in **theory and practice**.\n","- 🧑‍💻 Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- 🤖 Train **agents in unique environments**\n","\n","And more check 📚 the syllabus 👉 https://simoninithomas.github.io/deep-rl-course\n","\n","Don’t forget to **<a href=\"http://eepurl.com/ic5ZUD\">sign up to the course</a>** (we are collecting your email to be able to **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us 👉🏻 https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"0vgANIBBZg1p"},"source":["## Prerequisites 🏗️\n","Before diving into the notebook, you need to:\n","\n","🔲 📚 **[Study Deep Q-Learning by reading Unit 3](https://huggingface.co/deep-rl-course/unit3/introduction)**  🤗"]},{"cell_type":"markdown","source":["We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the Github Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"7kszpGFaRVhq"}},{"cell_type":"markdown","metadata":{"id":"QR0jZtYreSI5"},"source":["# Let's train a Deep Q-Learning agent playing Atari' Space Invaders 👾 and upload it to the Hub.\n","\n","We strongly recommend students **to use Google Colab for the hands-on exercises instead of running them on their personal computers**.\n","\n","By using Google Colab, **you can focus on learning and experimenting without worrying about the technical aspects of setting up your environments**.\n","\n","To validate this hands-on for the certification process, you need to push your trained model to the Hub and **get a result of >= 200**.\n","\n","To find your result, go to the leaderboard and find your model, **the result = mean_reward - std of reward**\n","\n","For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"]},{"cell_type":"markdown","source":["## An advice 💡\n","It's better to run this colab in a copy on your Google Drive, so that **if it timeouts** you still have the saved notebook on your Google Drive and do not need to fill everything from scratch.\n","\n","To do that you can either do `Ctrl + S` or `File > Save a copy in Google Drive.`\n","\n","Also, we're going to **train it for 90 minutes with 1M timesteps**. By typing `!nvidia-smi` will tell you what GPU you're using.\n","\n","And if you want to train more such 10 million steps, this will take about 9 hours, potentially resulting in Colab timing out. In that case, I recommend running this on your local computer (or somewhere else). Just click on: `File>Download`."],"metadata":{"id":"Nc8BnyVEc3Ys"}},{"cell_type":"code","source":["!nvidia-smi"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"RA9lvEMQUdKi","executionInfo":{"status":"ok","timestamp":1696584561299,"user_tz":-60,"elapsed":1143,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"0bc66424-a1b1-40bf-c3a5-8e2d70917136"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Fri Oct  6 09:29:19 2023       \n","+-----------------------------------------------------------------------------+\n","| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |\n","|-------------------------------+----------------------+----------------------+\n","| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n","| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n","|                               |                      |               MIG M. |\n","|===============================+======================+======================|\n","|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n","| N/A   51C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |\n","|                               |                      |                  N/A |\n","+-------------------------------+----------------------+----------------------+\n","                                                                               \n","+-----------------------------------------------------------------------------+\n","| Processes:                                                                  |\n","|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n","|        ID   ID                                                   Usage      |\n","|=============================================================================|\n","|  No running processes found                                                 |\n","+-----------------------------------------------------------------------------+\n"]}]},{"cell_type":"markdown","source":["## Set the GPU 💪\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step1.jpg\" alt=\"GPU Step 1\">"],"metadata":{"id":"PU4FVzaoM6fC"}},{"cell_type":"markdown","source":["- `Hardware Accelerator > GPU`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step2.jpg\" alt=\"GPU Step 2\">"],"metadata":{"id":"KV0NyFdQM9ZG"}},{"cell_type":"markdown","source":["# Install RL-Baselines3 Zoo and its dependencies 📚\n","\n","If you see `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed.` **this is normal and it's not a critical error** there's a conflict of version. But the packages we need are installed."],"metadata":{"id":"wS_cVefO-aYg"}},{"cell_type":"code","source":["# For now we install this update of RL-Baselines3 Zoo\n","!pip install git+https://github.com/DLR-RM/rl-baselines3-zoo@update/hf"],"metadata":{"id":"hLTwHqIWdnPb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696584719529,"user_tz":-60,"elapsed":143277,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"939d6eff-14d2-4d66-d51a-b33b10af71d4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting git+https://github.com/DLR-RM/rl-baselines3-zoo@update/hf\n","  Cloning https://github.com/DLR-RM/rl-baselines3-zoo (to revision update/hf) to /tmp/pip-req-build-p3lz3yfc\n","  Running command git clone --filter=blob:none --quiet https://github.com/DLR-RM/rl-baselines3-zoo /tmp/pip-req-build-p3lz3yfc\n","  Running command git checkout -b update/hf --track origin/update/hf\n","  Switched to a new branch 'update/hf'\n","  Branch 'update/hf' set up to track remote branch 'update/hf' from 'origin'.\n","  Resolved https://github.com/DLR-RM/rl-baselines3-zoo to commit 7dcbff7e74e7a12c052452181ff353a4dbed313a\n","  Running command git submodule update --init --recursive -q\n","  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n","  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Collecting sb3-contrib>=2.0.0a9 (from rl-zoo3==2.0.0a9)\n","  Downloading sb3_contrib-2.1.0-py3-none-any.whl (80 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.3/80.3 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting gym==0.26.2 (from rl-zoo3==2.0.0a9)\n","  Downloading gym-0.26.2.tar.gz (721 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m721.7/721.7 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n","  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Collecting huggingface-sb3>=2.2.1 (from rl-zoo3==2.0.0a9)\n","  Downloading huggingface_sb3-3.0-py3-none-any.whl (9.7 kB)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from rl-zoo3==2.0.0a9) (4.66.1)\n","Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from rl-zoo3==2.0.0a9) (13.6.0)\n","Collecting optuna (from rl-zoo3==2.0.0a9)\n","  Downloading optuna-3.3.0-py3-none-any.whl (404 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m404.2/404.2 kB\u001b[0m \u001b[31m29.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from rl-zoo3==2.0.0a9) (6.0.1)\n","Collecting pytablewriter~=0.64 (from rl-zoo3==2.0.0a9)\n","  Downloading pytablewriter-0.64.2-py3-none-any.whl (106 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.6/106.6 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.18.0 in /usr/local/lib/python3.10/dist-packages (from gym==0.26.2->rl-zoo3==2.0.0a9) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gym==0.26.2->rl-zoo3==2.0.0a9) (2.2.1)\n","Requirement already satisfied: gym-notices>=0.0.4 in /usr/local/lib/python3.10/dist-packages (from gym==0.26.2->rl-zoo3==2.0.0a9) (0.0.8)\n","Collecting huggingface-hub~=0.8 (from huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9)\n","  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: wasabi in /usr/local/lib/python3.10/dist-packages (from huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (1.1.2)\n","Requirement already satisfied: setuptools>=38.3.0 in /usr/local/lib/python3.10/dist-packages (from pytablewriter~=0.64->rl-zoo3==2.0.0a9) (67.7.2)\n","Collecting DataProperty<2,>=0.55.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n","  Downloading DataProperty-1.0.1-py3-none-any.whl (27 kB)\n","Collecting mbstrdecoder<2,>=1.0.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n","  Downloading mbstrdecoder-1.1.3-py3-none-any.whl (7.8 kB)\n","Collecting pathvalidate<3,>=2.3.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n","  Downloading pathvalidate-2.5.2-py3-none-any.whl (20 kB)\n","Collecting tabledata<2,>=1.3.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n","  Downloading tabledata-1.3.3-py3-none-any.whl (11 kB)\n","Collecting tcolorpy<1,>=0.0.5 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n","  Downloading tcolorpy-0.1.4-py3-none-any.whl (7.9 kB)\n","Collecting typepy[datetime]<2,>=1.2.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n","  Downloading typepy-1.3.2-py3-none-any.whl (31 kB)\n","Collecting stable-baselines3>=2.1.0 (from sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9)\n","  Downloading stable_baselines3-2.1.0-py3-none-any.whl (178 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m178.7/178.7 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting alembic>=1.5.0 (from optuna->rl-zoo3==2.0.0a9)\n","  Downloading alembic-1.12.0-py3-none-any.whl (226 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.0/226.0 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting cmaes>=0.10.0 (from optuna->rl-zoo3==2.0.0a9)\n","  Downloading cmaes-0.10.0-py3-none-any.whl (29 kB)\n","Collecting colorlog (from optuna->rl-zoo3==2.0.0a9)\n","  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from optuna->rl-zoo3==2.0.0a9) (23.2)\n","Requirement already satisfied: sqlalchemy>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from optuna->rl-zoo3==2.0.0a9) (2.0.21)\n","Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->rl-zoo3==2.0.0a9) (3.0.0)\n","Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->rl-zoo3==2.0.0a9) (2.16.1)\n","Collecting Mako (from alembic>=1.5.0->optuna->rl-zoo3==2.0.0a9)\n","  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.7/78.7 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: typing-extensions>=4 in /usr/local/lib/python3.10/dist-packages (from alembic>=1.5.0->optuna->rl-zoo3==2.0.0a9) (4.5.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (3.12.4)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (2023.6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (2.31.0)\n","Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->rl-zoo3==2.0.0a9) (0.1.2)\n","Requirement already satisfied: chardet<6,>=3.0.4 in /usr/local/lib/python3.10/dist-packages (from mbstrdecoder<2,>=1.0.0->pytablewriter~=0.64->rl-zoo3==2.0.0a9) (5.2.0)\n","Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy>=1.3.0->optuna->rl-zoo3==2.0.0a9) (3.0.0)\n","Collecting gymnasium<0.30,>=0.28.1 (from stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9)\n","  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m49.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: torch>=1.13 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (2.0.1+cu118)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.5.3)\n","Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.7.1)\n","Requirement already satisfied: python-dateutil<3.0.0,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from typepy[datetime]<2,>=1.2.0->pytablewriter~=0.64->rl-zoo3==2.0.0a9) (2.8.2)\n","Requirement already satisfied: pytz>=2018.9 in /usr/local/lib/python3.10/dist-packages (from typepy[datetime]<2,>=1.2.0->pytablewriter~=0.64->rl-zoo3==2.0.0a9) (2023.3.post1)\n","Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9)\n","  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3.0.0,>=2.8.0->typepy[datetime]<2,>=1.2.0->pytablewriter~=0.64->rl-zoo3==2.0.0a9) (1.16.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.1.2)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (2.0.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.27.6)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (17.0.2)\n","Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from Mako->alembic>=1.5.0->optuna->rl-zoo3==2.0.0a9) (2.1.3)\n","Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.1.1)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (0.12.0)\n","Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (4.43.0)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.4.5)\n","Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (9.4.0)\n","Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.1.1)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (2.0.6)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (2023.7.22)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.3.0)\n","Building wheels for collected packages: rl-zoo3, gym\n","  Building wheel for rl-zoo3 (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for rl-zoo3: filename=rl_zoo3-2.0.0a9-py3-none-any.whl size=76401 sha256=e9df8ae954f8408f3ca58502acb807a37aad1aff44f6723c00d77a0e02868346\n","  Stored in directory: /tmp/pip-ephem-wheel-cache-uqr6j5hl/wheels/fc/36/d5/2ef574649d85327de098075c8523da50be2612f3e5807261f7\n","  Building wheel for gym (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for gym: filename=gym-0.26.2-py3-none-any.whl size=827621 sha256=7d6a0506ad85e4f801ff76aaa17b47c3404f04d8fb7966e4c9c78c73b7e9cf13\n","  Stored in directory: /root/.cache/pip/wheels/b9/22/6d/3e7b32d98451b4cd9d12417052affbeeeea012955d437da1da\n","Successfully built rl-zoo3 gym\n","Installing collected packages: farama-notifications, tcolorpy, pathvalidate, mbstrdecoder, Mako, gymnasium, gym, colorlog, cmaes, typepy, huggingface-hub, alembic, optuna, huggingface-sb3, DataProperty, tabledata, pytablewriter, stable-baselines3, sb3-contrib, rl-zoo3\n","  Attempting uninstall: gym\n","    Found existing installation: gym 0.25.2\n","    Uninstalling gym-0.25.2:\n","      Successfully uninstalled gym-0.25.2\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","dopamine-rl 4.0.6 requires gym<=0.25.2, but you have gym 0.26.2 which is incompatible.\u001b[0m\u001b[31m\n","\u001b[0mSuccessfully installed DataProperty-1.0.1 Mako-1.2.4 alembic-1.12.0 cmaes-0.10.0 colorlog-6.7.0 farama-notifications-0.0.4 gym-0.26.2 gymnasium-0.29.1 huggingface-hub-0.17.3 huggingface-sb3-3.0 mbstrdecoder-1.1.3 optuna-3.3.0 pathvalidate-2.5.2 pytablewriter-0.64.2 rl-zoo3-2.0.0a9 sb3-contrib-2.1.0 stable-baselines3-2.1.0 tabledata-1.3.3 tcolorpy-0.1.4 typepy-1.3.2\n"]}]},{"cell_type":"markdown","source":["IF AND ONLY IF THE VERSION ABOVE DOES NOT EXIST ANYMORE. UNCOMMENT AND INSTALL THE ONE BELOW"],"metadata":{"id":"p0xe2sJHdtHy"}},{"cell_type":"code","source":["#!pip install rl_zoo3==2.0.0a9"],"metadata":{"id":"N0d6wy-F-f39"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["!apt-get install swig cmake ffmpeg"],"metadata":{"id":"8_MllY6Om1eI","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696584785636,"user_tz":-60,"elapsed":6952,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"3405c67f-4e42-4f75-bd6a-cd4b86450068"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Reading package lists... Done\n","Building dependency tree... Done\n","Reading state information... Done\n","cmake is already the newest version (3.22.1-1ubuntu1.22.04.1).\n","ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n","The following additional packages will be installed:\n","  swig4.0\n","Suggested packages:\n","  swig-doc swig-examples swig4.0-examples swig4.0-doc\n","The following NEW packages will be installed:\n","  swig swig4.0\n","0 upgraded, 2 newly installed, 0 to remove and 18 not upgraded.\n","Need to get 1,116 kB of archives.\n","After this operation, 5,542 kB of additional disk space will be used.\n","Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig4.0 amd64 4.0.2-1ubuntu1 [1,110 kB]\n","Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig all 4.0.2-1ubuntu1 [5,632 B]\n","Fetched 1,116 kB in 2s (697 kB/s)\n","Selecting previously unselected package swig4.0.\n","(Reading database ... 120879 files and directories currently installed.)\n","Preparing to unpack .../swig4.0_4.0.2-1ubuntu1_amd64.deb ...\n","Unpacking swig4.0 (4.0.2-1ubuntu1) ...\n","Selecting previously unselected package swig.\n","Preparing to unpack .../swig_4.0.2-1ubuntu1_all.deb ...\n","Unpacking swig (4.0.2-1ubuntu1) ...\n","Setting up swig4.0 (4.0.2-1ubuntu1) ...\n","Setting up swig (4.0.2-1ubuntu1) ...\n","Processing triggers for man-db (2.10.2-1) ...\n"]}]},{"cell_type":"markdown","metadata":{"id":"4S9mJiKg6SqC"},"source":["To be able to use Atari games in Gymnasium we need to install atari package. And accept-rom-license to download the rom files (games files)."]},{"cell_type":"code","source":["!pip install gymnasium[atari]\n","!pip install gymnasium[accept-rom-license]"],"metadata":{"id":"NsRP-lX1_2fC","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696584805712,"user_tz":-60,"elapsed":20086,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"2a748c7e-dde5-49d6-bc7d-03ad11cf56d1"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: gymnasium[atari] in /usr/local/lib/python3.10/dist-packages (0.29.1)\n","Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[atari]) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[atari]) (2.2.1)\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[atari]) (4.5.0)\n","Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium[atari]) (0.0.4)\n","Collecting shimmy[atari]<1.0,>=0.1.0 (from gymnasium[atari])\n","  Downloading Shimmy-0.2.1-py3-none-any.whl (25 kB)\n","Collecting ale-py~=0.8.1 (from shimmy[atari]<1.0,>=0.1.0->gymnasium[atari])\n","  Downloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m18.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from ale-py~=0.8.1->shimmy[atari]<1.0,>=0.1.0->gymnasium[atari]) (6.1.0)\n","Installing collected packages: ale-py, shimmy\n","Successfully installed ale-py-0.8.1 shimmy-0.2.1\n","Requirement already satisfied: gymnasium[accept-rom-license] in /usr/local/lib/python3.10/dist-packages (0.29.1)\n","Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[accept-rom-license]) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[accept-rom-license]) (2.2.1)\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[accept-rom-license]) (4.5.0)\n","Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium[accept-rom-license]) (0.0.4)\n","Collecting autorom[accept-rom-license]~=0.4.2 (from gymnasium[accept-rom-license])\n","  Downloading AutoROM-0.4.2-py3-none-any.whl (16 kB)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (8.1.7)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (2.31.0)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (4.66.1)\n","Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license])\n","  Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (434 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m434.7/434.7 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n","  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (2.0.6)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (2023.7.22)\n","Building wheels for collected packages: AutoROM.accept-rom-license\n","  Building wheel for AutoROM.accept-rom-license (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for AutoROM.accept-rom-license: filename=AutoROM.accept_rom_license-0.6.1-py3-none-any.whl size=446660 sha256=fd1a9e89131c49b793e331349960b5f3eefb0d31da169f9c487807a8dcb1a296\n","  Stored in directory: /root/.cache/pip/wheels/6b/1b/ef/a43ff1a2f1736d5711faa1ba4c1f61be1131b8899e6a057811\n","Successfully built AutoROM.accept-rom-license\n","Installing collected packages: AutoROM.accept-rom-license, autorom\n","Successfully installed AutoROM.accept-rom-license-0.6.1 autorom-0.4.2\n"]}]},{"cell_type":"markdown","source":["## Create a virtual display 🔽\n","\n","During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install the librairies and create and run a virtual screen 🖥"],"metadata":{"id":"bTpYcVZVMzUI"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"jV6wjQ7Be7p5"},"outputs":[],"source":["%%capture\n","!apt install python-opengl\n","!apt install ffmpeg\n","!apt install xvfb\n","!pip3 install pyvirtualdisplay"]},{"cell_type":"code","source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"],"metadata":{"id":"BE5JWP5rQIKf","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696584826034,"user_tz":-60,"elapsed":1592,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"62c5a796-5942-4eab-a115-a45548801126"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["<pyvirtualdisplay.display.Display at 0x7b2705a386d0>"]},"metadata":{},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"5iPgzluo9z-u"},"source":["## Train our Deep Q-Learning Agent to Play Space Invaders 👾\n","\n","To train an agent with RL-Baselines3-Zoo, we just need to do two things:\n","\n","1. Create a hyperparameter config file that will contain our training hyperparameters called `dqn.yml`.\n","\n","This is a template example:\n","\n","```\n","SpaceInvadersNoFrameskip-v4:\n","  env_wrapper:\n","    - stable_baselines3.common.atari_wrappers.AtariWrapper\n","  frame_stack: 4\n","  policy: 'CnnPolicy'\n","  n_timesteps: !!float 1e7\n","  buffer_size: 100000\n","  learning_rate: !!float 1e-4\n","  batch_size: 32\n","  learning_starts: 100000\n","  target_update_interval: 1000\n","  train_freq: 4\n","  gradient_steps: 1\n","  exploration_fraction: 0.1\n","  exploration_final_eps: 0.01\n","  # If True, you need to deactivate handle_timeout_termination\n","  # in the replay_buffer_kwargs\n","  optimize_memory_usage: False\n","```"]},{"cell_type":"markdown","metadata":{"id":"_VjblFSVDQOj"},"source":["Here we see that:\n","- We use the `Atari Wrapper` that preprocess the input (Frame reduction ,grayscale, stack 4 frames)\n","- We use `CnnPolicy`, since we use Convolutional layers to process the frames\n","- We train it for 10 million `n_timesteps`\n","- Memory (Experience Replay) size is 100000, aka the amount of experience steps you saved to train again your agent with.\n","\n","💡 My advice is to **reduce the training timesteps to 1M,** which will take about 90 minutes on a P100. `!nvidia-smi` will tell you what GPU you're using. At 10 million steps, this will take about 9 hours, which could likely result in Colab timing out. I recommend running this on your local computer (or somewhere else). Just click on: `File>Download`."]},{"cell_type":"markdown","metadata":{"id":"5qTkbWrkECOJ"},"source":["In terms of hyperparameters optimization, my advice is to focus on these 3 hyperparameters:\n","- `learning_rate`\n","- `buffer_size (Experience Memory size)`\n","- `batch_size`\n","\n","As a good practice, you need to **check the documentation to understand what each hyperparameters does**: https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html#parameters\n","\n"]},{"cell_type":"markdown","metadata":{"id":"Hn8bRTHvERRL"},"source":["2. We start the training and save the models on `logs` folder 📁\n","\n","- Define the algorithm after `--algo`, where we save the model after `-f` and where the hyperparameter config is after `-c`."]},{"cell_type":"code","execution_count":9,"metadata":{"id":"Xr1TVW4xfbz3","colab":{"base_uri":"https://localhost:8080/"},"outputId":"fba1f572-48ab-4870-d486-c99cdf4ec09a","executionInfo":{"status":"ok","timestamp":1696589371144,"user_tz":-60,"elapsed":2041815,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n","|    n_updates        | 101638   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.9e+03  |\n","|    ep_rew_mean      | 353      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2560     |\n","|    fps              | 254      |\n","|    time_elapsed     | 1994     |\n","|    total_timesteps  | 507359   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0151   |\n","|    n_updates        | 101839   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.91e+03 |\n","|    ep_rew_mean      | 356      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2564     |\n","|    fps              | 254      |\n","|    time_elapsed     | 2000     |\n","|    total_timesteps  | 508616   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0174   |\n","|    n_updates        | 102153   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.9e+03  |\n","|    ep_rew_mean      | 352      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2568     |\n","|    fps              | 254      |\n","|    time_elapsed     | 2004     |\n","|    total_timesteps  | 509632   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00915  |\n","|    n_updates        | 102407   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.91e+03 |\n","|    ep_rew_mean      | 355      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2572     |\n","|    fps              | 254      |\n","|    time_elapsed     | 2008     |\n","|    total_timesteps  | 510699   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0112   |\n","|    n_updates        | 102674   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.91e+03 |\n","|    ep_rew_mean      | 355      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2576     |\n","|    fps              | 254      |\n","|    time_elapsed     | 2014     |\n","|    total_timesteps  | 511805   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0338   |\n","|    n_updates        | 102951   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.92e+03 |\n","|    ep_rew_mean      | 358      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2580     |\n","|    fps              | 254      |\n","|    time_elapsed     | 2017     |\n","|    total_timesteps  | 512491   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0318   |\n","|    n_updates        | 103122   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.93e+03 |\n","|    ep_rew_mean      | 356      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2584     |\n","|    fps              | 254      |\n","|    time_elapsed     | 2022     |\n","|    total_timesteps  | 513724   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0167   |\n","|    n_updates        | 103430   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.94e+03 |\n","|    ep_rew_mean      | 358      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2588     |\n","|    fps              | 254      |\n","|    time_elapsed     | 2025     |\n","|    total_timesteps  | 514581   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.029    |\n","|    n_updates        | 103645   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.95e+03 |\n","|    ep_rew_mean      | 362      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2592     |\n","|    fps              | 253      |\n","|    time_elapsed     | 2030     |\n","|    total_timesteps  | 515544   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0355   |\n","|    n_updates        | 103885   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.95e+03 |\n","|    ep_rew_mean      | 361      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2596     |\n","|    fps              | 253      |\n","|    time_elapsed     | 2032     |\n","|    total_timesteps  | 516320   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0251   |\n","|    n_updates        | 104079   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.96e+03 |\n","|    ep_rew_mean      | 363      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2600     |\n","|    fps              | 253      |\n","|    time_elapsed     | 2038     |\n","|    total_timesteps  | 517705   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0274   |\n","|    n_updates        | 104426   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.96e+03 |\n","|    ep_rew_mean      | 362      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2604     |\n","|    fps              | 253      |\n","|    time_elapsed     | 2043     |\n","|    total_timesteps  | 518964   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0375   |\n","|    n_updates        | 104740   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.97e+03 |\n","|    ep_rew_mean      | 363      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2608     |\n","|    fps              | 253      |\n","|    time_elapsed     | 2048     |\n","|    total_timesteps  | 520042   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0122   |\n","|    n_updates        | 105010   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.96e+03 |\n","|    ep_rew_mean      | 362      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2612     |\n","|    fps              | 253      |\n","|    time_elapsed     | 2051     |\n","|    total_timesteps  | 520838   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0251   |\n","|    n_updates        | 105209   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.96e+03 |\n","|    ep_rew_mean      | 363      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2616     |\n","|    fps              | 253      |\n","|    time_elapsed     | 2056     |\n","|    total_timesteps  | 522069   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0218   |\n","|    n_updates        | 105517   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 2.99e+03 |\n","|    ep_rew_mean      | 368      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2620     |\n","|    fps              | 253      |\n","|    time_elapsed     | 2060     |\n","|    total_timesteps  | 523170   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0512   |\n","|    n_updates        | 105792   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3e+03    |\n","|    ep_rew_mean      | 370      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2624     |\n","|    fps              | 253      |\n","|    time_elapsed     | 2063     |\n","|    total_timesteps  | 523962   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0352   |\n","|    n_updates        | 105990   |\n","----------------------------------\n","Eval num_timesteps=525000, episode_reward=430.00 +/- 118.57\n","Episode length: 3479.40 +/- 763.89\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 3.48e+03 |\n","|    mean_reward      | 430      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 525000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0284   |\n","|    n_updates        | 106249   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.01e+03 |\n","|    ep_rew_mean      | 371      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2628     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2079     |\n","|    total_timesteps  | 525273   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0263   |\n","|    n_updates        | 106318   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.02e+03 |\n","|    ep_rew_mean      | 371      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2632     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2085     |\n","|    total_timesteps  | 526692   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0138   |\n","|    n_updates        | 106672   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.04e+03 |\n","|    ep_rew_mean      | 377      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2636     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2089     |\n","|    total_timesteps  | 527835   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0881   |\n","|    n_updates        | 106958   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.04e+03 |\n","|    ep_rew_mean      | 377      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2640     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2095     |\n","|    total_timesteps  | 529111   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0247   |\n","|    n_updates        | 107277   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.05e+03 |\n","|    ep_rew_mean      | 380      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2644     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2100     |\n","|    total_timesteps  | 530435   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0463   |\n","|    n_updates        | 107608   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.06e+03 |\n","|    ep_rew_mean      | 382      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2648     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2105     |\n","|    total_timesteps  | 531667   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0137   |\n","|    n_updates        | 107916   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.04e+03 |\n","|    ep_rew_mean      | 377      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2652     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2111     |\n","|    total_timesteps  | 533041   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0133   |\n","|    n_updates        | 108260   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.05e+03 |\n","|    ep_rew_mean      | 378      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2656     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2115     |\n","|    total_timesteps  | 534137   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0234   |\n","|    n_updates        | 108534   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.05e+03 |\n","|    ep_rew_mean      | 377      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2660     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2119     |\n","|    total_timesteps  | 535101   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0122   |\n","|    n_updates        | 108775   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.05e+03 |\n","|    ep_rew_mean      | 379      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2664     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2124     |\n","|    total_timesteps  | 536139   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0282   |\n","|    n_updates        | 109034   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.04e+03 |\n","|    ep_rew_mean      | 376      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2668     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2127     |\n","|    total_timesteps  | 537007   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.025    |\n","|    n_updates        | 109251   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.04e+03 |\n","|    ep_rew_mean      | 379      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2672     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2133     |\n","|    total_timesteps  | 538511   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0569   |\n","|    n_updates        | 109627   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.05e+03 |\n","|    ep_rew_mean      | 382      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2676     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2136     |\n","|    total_timesteps  | 539141   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00827  |\n","|    n_updates        | 109785   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.02e+03 |\n","|    ep_rew_mean      | 376      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2680     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2139     |\n","|    total_timesteps  | 539981   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0265   |\n","|    n_updates        | 109995   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.03e+03 |\n","|    ep_rew_mean      | 379      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2684     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2145     |\n","|    total_timesteps  | 541286   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0467   |\n","|    n_updates        | 110321   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.07e+03 |\n","|    ep_rew_mean      | 387      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2688     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2152     |\n","|    total_timesteps  | 543180   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0222   |\n","|    n_updates        | 110794   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.07e+03 |\n","|    ep_rew_mean      | 384      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2692     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2156     |\n","|    total_timesteps  | 544149   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0196   |\n","|    n_updates        | 111037   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.09e+03 |\n","|    ep_rew_mean      | 388      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2696     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2164     |\n","|    total_timesteps  | 546057   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0163   |\n","|    n_updates        | 111514   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.12e+03 |\n","|    ep_rew_mean      | 393      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2700     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2171     |\n","|    total_timesteps  | 547897   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.05     |\n","|    n_updates        | 111974   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.13e+03 |\n","|    ep_rew_mean      | 397      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2704     |\n","|    fps              | 252      |\n","|    time_elapsed     | 2176     |\n","|    total_timesteps  | 548981   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0233   |\n","|    n_updates        | 112245   |\n","----------------------------------\n","Eval num_timesteps=550000, episode_reward=492.00 +/- 235.45\n","Episode length: 4043.20 +/- 1726.44\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.04e+03 |\n","|    mean_reward      | 492      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 550000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0169   |\n","|    n_updates        | 112499   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.16e+03 |\n","|    ep_rew_mean      | 400      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2708     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2193     |\n","|    total_timesteps  | 550490   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0357   |\n","|    n_updates        | 112622   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.17e+03 |\n","|    ep_rew_mean      | 404      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2712     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2197     |\n","|    total_timesteps  | 551305   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0193   |\n","|    n_updates        | 112826   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.18e+03 |\n","|    ep_rew_mean      | 407      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2716     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2203     |\n","|    total_timesteps  | 552842   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0302   |\n","|    n_updates        | 113210   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.2e+03  |\n","|    ep_rew_mean      | 410      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2720     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2207     |\n","|    total_timesteps  | 553871   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0118   |\n","|    n_updates        | 113467   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.21e+03 |\n","|    ep_rew_mean      | 412      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2724     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2211     |\n","|    total_timesteps  | 554791   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0117   |\n","|    n_updates        | 113697   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.22e+03 |\n","|    ep_rew_mean      | 416      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2728     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2217     |\n","|    total_timesteps  | 556161   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0135   |\n","|    n_updates        | 114040   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.23e+03 |\n","|    ep_rew_mean      | 416      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2732     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2222     |\n","|    total_timesteps  | 557507   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0237   |\n","|    n_updates        | 114376   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.23e+03 |\n","|    ep_rew_mean      | 419      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2736     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2229     |\n","|    total_timesteps  | 559166   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0167   |\n","|    n_updates        | 114791   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.24e+03 |\n","|    ep_rew_mean      | 420      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2740     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2233     |\n","|    total_timesteps  | 560218   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0171   |\n","|    n_updates        | 115054   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.25e+03 |\n","|    ep_rew_mean      | 422      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2744     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2237     |\n","|    total_timesteps  | 561216   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00583  |\n","|    n_updates        | 115303   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.25e+03 |\n","|    ep_rew_mean      | 421      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2748     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2242     |\n","|    total_timesteps  | 562157   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0293   |\n","|    n_updates        | 115539   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.26e+03 |\n","|    ep_rew_mean      | 418      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2752     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2254     |\n","|    total_timesteps  | 565085   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0196   |\n","|    n_updates        | 116271   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.33e+03 |\n","|    ep_rew_mean      | 424      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2756     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2259     |\n","|    total_timesteps  | 566373   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0162   |\n","|    n_updates        | 116593   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.33e+03 |\n","|    ep_rew_mean      | 425      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2760     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2263     |\n","|    total_timesteps  | 567494   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0151   |\n","|    n_updates        | 116873   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.33e+03 |\n","|    ep_rew_mean      | 425      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2764     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2269     |\n","|    total_timesteps  | 568809   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0133   |\n","|    n_updates        | 117202   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.35e+03 |\n","|    ep_rew_mean      | 430      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2768     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2273     |\n","|    total_timesteps  | 569830   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0314   |\n","|    n_updates        | 117457   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.33e+03 |\n","|    ep_rew_mean      | 432      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2772     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2282     |\n","|    total_timesteps  | 572194   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0212   |\n","|    n_updates        | 118048   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.34e+03 |\n","|    ep_rew_mean      | 436      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2776     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2286     |\n","|    total_timesteps  | 573205   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0174   |\n","|    n_updates        | 118301   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.34e+03 |\n","|    ep_rew_mean      | 436      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2780     |\n","|    fps              | 250      |\n","|    time_elapsed     | 2291     |\n","|    total_timesteps  | 574391   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.097    |\n","|    n_updates        | 118597   |\n","----------------------------------\n","Eval num_timesteps=575000, episode_reward=340.00 +/- 40.50\n","Episode length: 2673.60 +/- 255.43\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 2.67e+03 |\n","|    mean_reward      | 340      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 575000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0154   |\n","|    n_updates        | 118749   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.37e+03 |\n","|    ep_rew_mean      | 440      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2784     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2302     |\n","|    total_timesteps  | 575270   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0153   |\n","|    n_updates        | 118817   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.4e+03  |\n","|    ep_rew_mean      | 446      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2788     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2309     |\n","|    total_timesteps  | 576931   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.11     |\n","|    n_updates        | 119232   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.38e+03 |\n","|    ep_rew_mean      | 443      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2792     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2316     |\n","|    total_timesteps  | 578588   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.029    |\n","|    n_updates        | 119646   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.41e+03 |\n","|    ep_rew_mean      | 447      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2796     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2324     |\n","|    total_timesteps  | 580787   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0364   |\n","|    n_updates        | 120196   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.43e+03 |\n","|    ep_rew_mean      | 454      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2800     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2329     |\n","|    total_timesteps  | 581943   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0293   |\n","|    n_updates        | 120485   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.45e+03 |\n","|    ep_rew_mean      | 455      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2804     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2341     |\n","|    total_timesteps  | 584902   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.028    |\n","|    n_updates        | 121225   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.49e+03 |\n","|    ep_rew_mean      | 460      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2808     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2345     |\n","|    total_timesteps  | 586118   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0175   |\n","|    n_updates        | 121529   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.5e+03  |\n","|    ep_rew_mean      | 460      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2812     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2352     |\n","|    total_timesteps  | 587667   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0452   |\n","|    n_updates        | 121916   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.5e+03  |\n","|    ep_rew_mean      | 460      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2816     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2356     |\n","|    total_timesteps  | 588842   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0314   |\n","|    n_updates        | 122210   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.55e+03 |\n","|    ep_rew_mean      | 467      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2820     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2363     |\n","|    total_timesteps  | 590609   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0144   |\n","|    n_updates        | 122652   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.56e+03 |\n","|    ep_rew_mean      | 470      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2824     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2368     |\n","|    total_timesteps  | 591694   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.13     |\n","|    n_updates        | 122923   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.6e+03  |\n","|    ep_rew_mean      | 474      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2828     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2376     |\n","|    total_timesteps  | 593682   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0197   |\n","|    n_updates        | 123420   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.62e+03 |\n","|    ep_rew_mean      | 478      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2832     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2388     |\n","|    total_timesteps  | 596695   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0225   |\n","|    n_updates        | 124173   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.66e+03 |\n","|    ep_rew_mean      | 485      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2836     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2393     |\n","|    total_timesteps  | 598089   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0176   |\n","|    n_updates        | 124522   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.71e+03 |\n","|    ep_rew_mean      | 494      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2840     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2399     |\n","|    total_timesteps  | 599779   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0249   |\n","|    n_updates        | 124944   |\n","----------------------------------\n","Eval num_timesteps=600000, episode_reward=353.00 +/- 136.55\n","Episode length: 2427.20 +/- 634.37\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 2.43e+03 |\n","|    mean_reward      | 353      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 600000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0197   |\n","|    n_updates        | 124999   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.72e+03 |\n","|    ep_rew_mean      | 495      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2844     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2414     |\n","|    total_timesteps  | 601559   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0209   |\n","|    n_updates        | 125389   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.73e+03 |\n","|    ep_rew_mean      | 498      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2848     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2418     |\n","|    total_timesteps  | 602519   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0205   |\n","|    n_updates        | 125629   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.75e+03 |\n","|    ep_rew_mean      | 503      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2852     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2425     |\n","|    total_timesteps  | 604395   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0122   |\n","|    n_updates        | 126098   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.78e+03 |\n","|    ep_rew_mean      | 506      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2856     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2431     |\n","|    total_timesteps  | 605999   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0392   |\n","|    n_updates        | 126499   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.79e+03 |\n","|    ep_rew_mean      | 514      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2860     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2439     |\n","|    total_timesteps  | 607920   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0414   |\n","|    n_updates        | 126979   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.81e+03 |\n","|    ep_rew_mean      | 517      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2864     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2445     |\n","|    total_timesteps  | 609371   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0219   |\n","|    n_updates        | 127342   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.82e+03 |\n","|    ep_rew_mean      | 517      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2868     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2449     |\n","|    total_timesteps  | 610480   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00944  |\n","|    n_updates        | 127619   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.82e+03 |\n","|    ep_rew_mean      | 521      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2872     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2456     |\n","|    total_timesteps  | 612208   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.016    |\n","|    n_updates        | 128051   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.82e+03 |\n","|    ep_rew_mean      | 522      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2876     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2462     |\n","|    total_timesteps  | 613548   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0268   |\n","|    n_updates        | 128386   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.85e+03 |\n","|    ep_rew_mean      | 524      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2880     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2469     |\n","|    total_timesteps  | 615312   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0244   |\n","|    n_updates        | 128827   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.87e+03 |\n","|    ep_rew_mean      | 525      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2884     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2477     |\n","|    total_timesteps  | 617261   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0365   |\n","|    n_updates        | 129315   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.88e+03 |\n","|    ep_rew_mean      | 528      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2888     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2481     |\n","|    total_timesteps  | 618178   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0139   |\n","|    n_updates        | 129544   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.88e+03 |\n","|    ep_rew_mean      | 528      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2892     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2485     |\n","|    total_timesteps  | 619349   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0205   |\n","|    n_updates        | 129837   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.9e+03  |\n","|    ep_rew_mean      | 531      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2896     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2490     |\n","|    total_timesteps  | 620479   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0108   |\n","|    n_updates        | 130119   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.88e+03 |\n","|    ep_rew_mean      | 529      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2900     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2495     |\n","|    total_timesteps  | 621666   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0827   |\n","|    n_updates        | 130416   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.9e+03  |\n","|    ep_rew_mean      | 531      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2904     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2501     |\n","|    total_timesteps  | 623259   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0249   |\n","|    n_updates        | 130814   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.89e+03 |\n","|    ep_rew_mean      | 528      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2908     |\n","|    fps              | 249      |\n","|    time_elapsed     | 2507     |\n","|    total_timesteps  | 624613   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0166   |\n","|    n_updates        | 131153   |\n","----------------------------------\n","Eval num_timesteps=625000, episode_reward=723.00 +/- 289.35\n","Episode length: 4884.20 +/- 1192.20\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.88e+03 |\n","|    mean_reward      | 723      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 625000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0235   |\n","|    n_updates        | 131249   |\n","----------------------------------\n","New best mean reward!\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.89e+03 |\n","|    ep_rew_mean      | 526      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2912     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2525     |\n","|    total_timesteps  | 625653   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0108   |\n","|    n_updates        | 131413   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.88e+03 |\n","|    ep_rew_mean      | 527      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2916     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2532     |\n","|    total_timesteps  | 627396   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0258   |\n","|    n_updates        | 131848   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.89e+03 |\n","|    ep_rew_mean      | 528      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2920     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2538     |\n","|    total_timesteps  | 628901   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0133   |\n","|    n_updates        | 132225   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.91e+03 |\n","|    ep_rew_mean      | 531      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2924     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2545     |\n","|    total_timesteps  | 630481   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00973  |\n","|    n_updates        | 132620   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.91e+03 |\n","|    ep_rew_mean      | 537      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2928     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2551     |\n","|    total_timesteps  | 632043   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0268   |\n","|    n_updates        | 133010   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.91e+03 |\n","|    ep_rew_mean      | 536      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2932     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2556     |\n","|    total_timesteps  | 633348   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0116   |\n","|    n_updates        | 133336   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.95e+03 |\n","|    ep_rew_mean      | 544      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2936     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2562     |\n","|    total_timesteps  | 634774   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0158   |\n","|    n_updates        | 133693   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.96e+03 |\n","|    ep_rew_mean      | 543      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2940     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2568     |\n","|    total_timesteps  | 636181   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0167   |\n","|    n_updates        | 134045   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.97e+03 |\n","|    ep_rew_mean      | 546      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2944     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2575     |\n","|    total_timesteps  | 638228   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0103   |\n","|    n_updates        | 134556   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.96e+03 |\n","|    ep_rew_mean      | 546      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2948     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2582     |\n","|    total_timesteps  | 639748   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0363   |\n","|    n_updates        | 134936   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.93e+03 |\n","|    ep_rew_mean      | 540      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2952     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2585     |\n","|    total_timesteps  | 640540   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0188   |\n","|    n_updates        | 135134   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.93e+03 |\n","|    ep_rew_mean      | 543      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2956     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2590     |\n","|    total_timesteps  | 641736   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0383   |\n","|    n_updates        | 135433   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.92e+03 |\n","|    ep_rew_mean      | 538      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2960     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2597     |\n","|    total_timesteps  | 643571   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0209   |\n","|    n_updates        | 135892   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.92e+03 |\n","|    ep_rew_mean      | 539      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2964     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2605     |\n","|    total_timesteps  | 645596   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0468   |\n","|    n_updates        | 136398   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.97e+03 |\n","|    ep_rew_mean      | 544      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2968     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2611     |\n","|    total_timesteps  | 647143   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0285   |\n","|    n_updates        | 136785   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.95e+03 |\n","|    ep_rew_mean      | 541      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2972     |\n","|    fps              | 247      |\n","|    time_elapsed     | 2617     |\n","|    total_timesteps  | 648568   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00841  |\n","|    n_updates        | 137141   |\n","----------------------------------\n","Eval num_timesteps=650000, episode_reward=538.00 +/- 137.43\n","Episode length: 3883.00 +/- 491.66\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 3.88e+03 |\n","|    mean_reward      | 538      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 650000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0481   |\n","|    n_updates        | 137499   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 544      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2976     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2634     |\n","|    total_timesteps  | 650086   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00674  |\n","|    n_updates        | 137521   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 544      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2980     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2639     |\n","|    total_timesteps  | 651309   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0175   |\n","|    n_updates        | 137827   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 543      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2984     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2641     |\n","|    total_timesteps  | 651875   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0372   |\n","|    n_updates        | 137968   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 545      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2988     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2646     |\n","|    total_timesteps  | 653196   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0185   |\n","|    n_updates        | 138298   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.97e+03 |\n","|    ep_rew_mean      | 543      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2992     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2653     |\n","|    total_timesteps  | 654745   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0197   |\n","|    n_updates        | 138686   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 546      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 2996     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2657     |\n","|    total_timesteps  | 655848   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.021    |\n","|    n_updates        | 138961   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 549      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3000     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2662     |\n","|    total_timesteps  | 657058   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.024    |\n","|    n_updates        | 139264   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 549      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3004     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2668     |\n","|    total_timesteps  | 658475   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0303   |\n","|    n_updates        | 139618   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 551      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3008     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2673     |\n","|    total_timesteps  | 659938   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00681  |\n","|    n_updates        | 139984   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 552      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3012     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2679     |\n","|    total_timesteps  | 661162   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0103   |\n","|    n_updates        | 140290   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 553      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3016     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2684     |\n","|    total_timesteps  | 662643   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0242   |\n","|    n_updates        | 140660   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.93e+03 |\n","|    ep_rew_mean      | 550      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3020     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2688     |\n","|    total_timesteps  | 663644   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0145   |\n","|    n_updates        | 140910   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.94e+03 |\n","|    ep_rew_mean      | 552      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3024     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2694     |\n","|    total_timesteps  | 665073   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0152   |\n","|    n_updates        | 141268   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.95e+03 |\n","|    ep_rew_mean      | 553      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3028     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2699     |\n","|    total_timesteps  | 666238   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0335   |\n","|    n_updates        | 141559   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.93e+03 |\n","|    ep_rew_mean      | 544      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3032     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2703     |\n","|    total_timesteps  | 667117   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0346   |\n","|    n_updates        | 141779   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.95e+03 |\n","|    ep_rew_mean      | 550      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3036     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2708     |\n","|    total_timesteps  | 668512   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.014    |\n","|    n_updates        | 142127   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 555      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3040     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2720     |\n","|    total_timesteps  | 671428   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0391   |\n","|    n_updates        | 142856   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 556      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3044     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2725     |\n","|    total_timesteps  | 672642   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0508   |\n","|    n_updates        | 143160   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 560      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3048     |\n","|    fps              | 246      |\n","|    time_elapsed     | 2732     |\n","|    total_timesteps  | 674048   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.01     |\n","|    n_updates        | 143511   |\n","----------------------------------\n","Eval num_timesteps=675000, episode_reward=684.00 +/- 133.13\n","Episode length: 4844.60 +/- 1307.13\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.84e+03 |\n","|    mean_reward      | 684      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 675000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0187   |\n","|    n_updates        | 143749   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 558      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3052     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2752     |\n","|    total_timesteps  | 675997   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0408   |\n","|    n_updates        | 143999   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 555      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3056     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2758     |\n","|    total_timesteps  | 677409   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0402   |\n","|    n_updates        | 144352   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 556      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3060     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2761     |\n","|    total_timesteps  | 678212   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0775   |\n","|    n_updates        | 144552   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 554      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3064     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2770     |\n","|    total_timesteps  | 680294   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00943  |\n","|    n_updates        | 145073   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 550      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3068     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2775     |\n","|    total_timesteps  | 681600   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00632  |\n","|    n_updates        | 145399   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 549      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3072     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2780     |\n","|    total_timesteps  | 682753   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0301   |\n","|    n_updates        | 145688   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.96e+03 |\n","|    ep_rew_mean      | 547      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3076     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2783     |\n","|    total_timesteps  | 683588   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00575  |\n","|    n_updates        | 145896   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.95e+03 |\n","|    ep_rew_mean      | 545      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3080     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2790     |\n","|    total_timesteps  | 685278   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00993  |\n","|    n_updates        | 146319   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.93e+03 |\n","|    ep_rew_mean      | 547      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3084     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2799     |\n","|    total_timesteps  | 687727   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0194   |\n","|    n_updates        | 146931   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.94e+03 |\n","|    ep_rew_mean      | 547      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3088     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2807     |\n","|    total_timesteps  | 689640   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.014    |\n","|    n_updates        | 147409   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.91e+03 |\n","|    ep_rew_mean      | 543      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3092     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2810     |\n","|    total_timesteps  | 690460   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0437   |\n","|    n_updates        | 147614   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.89e+03 |\n","|    ep_rew_mean      | 539      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3096     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2817     |\n","|    total_timesteps  | 692225   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0196   |\n","|    n_updates        | 148056   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.83e+03 |\n","|    ep_rew_mean      | 531      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3100     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2825     |\n","|    total_timesteps  | 694230   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0294   |\n","|    n_updates        | 148557   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.84e+03 |\n","|    ep_rew_mean      | 534      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3104     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2829     |\n","|    total_timesteps  | 695205   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0437   |\n","|    n_updates        | 148801   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.85e+03 |\n","|    ep_rew_mean      | 538      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3108     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2836     |\n","|    total_timesteps  | 696969   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0735   |\n","|    n_updates        | 149242   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.86e+03 |\n","|    ep_rew_mean      | 542      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3112     |\n","|    fps              | 245      |\n","|    time_elapsed     | 2842     |\n","|    total_timesteps  | 698432   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0814   |\n","|    n_updates        | 149607   |\n","----------------------------------\n","Eval num_timesteps=700000, episode_reward=586.00 +/- 59.28\n","Episode length: 4325.80 +/- 662.72\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.33e+03 |\n","|    mean_reward      | 586      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 700000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0493   |\n","|    n_updates        | 149999   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.84e+03 |\n","|    ep_rew_mean      | 537      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3116     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2860     |\n","|    total_timesteps  | 700073   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0312   |\n","|    n_updates        | 150018   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.86e+03 |\n","|    ep_rew_mean      | 546      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3120     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2866     |\n","|    total_timesteps  | 701480   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0123   |\n","|    n_updates        | 150369   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.86e+03 |\n","|    ep_rew_mean      | 545      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3124     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2872     |\n","|    total_timesteps  | 703094   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0804   |\n","|    n_updates        | 150773   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.86e+03 |\n","|    ep_rew_mean      | 543      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3128     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2877     |\n","|    total_timesteps  | 704190   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0935   |\n","|    n_updates        | 151047   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.89e+03 |\n","|    ep_rew_mean      | 546      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3132     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2887     |\n","|    total_timesteps  | 706766   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0505   |\n","|    n_updates        | 151691   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.89e+03 |\n","|    ep_rew_mean      | 546      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3136     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2898     |\n","|    total_timesteps  | 709592   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00818  |\n","|    n_updates        | 152397   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.89e+03 |\n","|    ep_rew_mean      | 553      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3140     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2907     |\n","|    total_timesteps  | 711726   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0425   |\n","|    n_updates        | 152931   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.92e+03 |\n","|    ep_rew_mean      | 558      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3144     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2914     |\n","|    total_timesteps  | 713647   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0287   |\n","|    n_updates        | 153411   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.89e+03 |\n","|    ep_rew_mean      | 554      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3148     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2920     |\n","|    total_timesteps  | 714944   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00862  |\n","|    n_updates        | 153735   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.88e+03 |\n","|    ep_rew_mean      | 559      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3152     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2927     |\n","|    total_timesteps  | 716917   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0322   |\n","|    n_updates        | 154229   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.88e+03 |\n","|    ep_rew_mean      | 564      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3156     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2941     |\n","|    total_timesteps  | 720219   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0833   |\n","|    n_updates        | 155054   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.92e+03 |\n","|    ep_rew_mean      | 572      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3160     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2947     |\n","|    total_timesteps  | 721611   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0287   |\n","|    n_updates        | 155402   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.91e+03 |\n","|    ep_rew_mean      | 573      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3164     |\n","|    fps              | 244      |\n","|    time_elapsed     | 2952     |\n","|    total_timesteps  | 722926   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0181   |\n","|    n_updates        | 155731   |\n","----------------------------------\n","Eval num_timesteps=725000, episode_reward=489.00 +/- 127.77\n","Episode length: 3926.00 +/- 1038.94\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 3.93e+03 |\n","|    mean_reward      | 489      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 725000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0232   |\n","|    n_updates        | 156249   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.92e+03 |\n","|    ep_rew_mean      | 576      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3168     |\n","|    fps              | 243      |\n","|    time_elapsed     | 2973     |\n","|    total_timesteps  | 725173   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00967  |\n","|    n_updates        | 156293   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.91e+03 |\n","|    ep_rew_mean      | 575      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3172     |\n","|    fps              | 243      |\n","|    time_elapsed     | 2980     |\n","|    total_timesteps  | 726826   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0322   |\n","|    n_updates        | 156706   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.91e+03 |\n","|    ep_rew_mean      | 574      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3176     |\n","|    fps              | 243      |\n","|    time_elapsed     | 2986     |\n","|    total_timesteps  | 728422   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0226   |\n","|    n_updates        | 157105   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.92e+03 |\n","|    ep_rew_mean      | 571      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3180     |\n","|    fps              | 243      |\n","|    time_elapsed     | 2991     |\n","|    total_timesteps  | 729682   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0232   |\n","|    n_updates        | 157420   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.93e+03 |\n","|    ep_rew_mean      | 573      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3184     |\n","|    fps              | 243      |\n","|    time_elapsed     | 2999     |\n","|    total_timesteps  | 731839   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0598   |\n","|    n_updates        | 157959   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.93e+03 |\n","|    ep_rew_mean      | 576      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3188     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3005     |\n","|    total_timesteps  | 733149   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0131   |\n","|    n_updates        | 158287   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.93e+03 |\n","|    ep_rew_mean      | 576      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3192     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3013     |\n","|    total_timesteps  | 735318   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0224   |\n","|    n_updates        | 158829   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.95e+03 |\n","|    ep_rew_mean      | 577      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3196     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3020     |\n","|    total_timesteps  | 736871   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0563   |\n","|    n_updates        | 159217   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 581      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3200     |\n","|    fps              | 244      |\n","|    time_elapsed     | 3028     |\n","|    total_timesteps  | 739040   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0236   |\n","|    n_updates        | 159759   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 581      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3204     |\n","|    fps              | 244      |\n","|    time_elapsed     | 3036     |\n","|    total_timesteps  | 741177   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0414   |\n","|    n_updates        | 160294   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 581      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3208     |\n","|    fps              | 244      |\n","|    time_elapsed     | 3043     |\n","|    total_timesteps  | 742726   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0196   |\n","|    n_updates        | 160681   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 580      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3212     |\n","|    fps              | 244      |\n","|    time_elapsed     | 3050     |\n","|    total_timesteps  | 744633   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0318   |\n","|    n_updates        | 161158   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 587      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3216     |\n","|    fps              | 244      |\n","|    time_elapsed     | 3059     |\n","|    total_timesteps  | 746792   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0196   |\n","|    n_updates        | 161697   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 587      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3220     |\n","|    fps              | 244      |\n","|    time_elapsed     | 3065     |\n","|    total_timesteps  | 748345   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0306   |\n","|    n_updates        | 162086   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 586      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3224     |\n","|    fps              | 244      |\n","|    time_elapsed     | 3071     |\n","|    total_timesteps  | 749829   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0119   |\n","|    n_updates        | 162457   |\n","----------------------------------\n","Eval num_timesteps=750000, episode_reward=681.00 +/- 150.41\n","Episode length: 5224.80 +/- 1098.07\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 5.22e+03 |\n","|    mean_reward      | 681      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 750000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0138   |\n","|    n_updates        | 162499   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 595      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3228     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3091     |\n","|    total_timesteps  | 751350   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0263   |\n","|    n_updates        | 162837   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.04e+03 |\n","|    ep_rew_mean      | 598      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3232     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3099     |\n","|    total_timesteps  | 753191   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0133   |\n","|    n_updates        | 163297   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.04e+03 |\n","|    ep_rew_mean      | 601      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3236     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3107     |\n","|    total_timesteps  | 755109   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0159   |\n","|    n_updates        | 163777   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.05e+03 |\n","|    ep_rew_mean      | 600      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3240     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3114     |\n","|    total_timesteps  | 757015   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0163   |\n","|    n_updates        | 164253   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.04e+03 |\n","|    ep_rew_mean      | 602      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3244     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3119     |\n","|    total_timesteps  | 758223   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0436   |\n","|    n_updates        | 164555   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.04e+03 |\n","|    ep_rew_mean      | 603      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3248     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3127     |\n","|    total_timesteps  | 760238   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0436   |\n","|    n_updates        | 165059   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.05e+03 |\n","|    ep_rew_mean      | 605      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3252     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3134     |\n","|    total_timesteps  | 761857   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.093    |\n","|    n_updates        | 165464   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.06e+03 |\n","|    ep_rew_mean      | 604      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3256     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3141     |\n","|    total_timesteps  | 763761   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0145   |\n","|    n_updates        | 165940   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.08e+03 |\n","|    ep_rew_mean      | 607      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3260     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3149     |\n","|    total_timesteps  | 765639   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0132   |\n","|    n_updates        | 166409   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.08e+03 |\n","|    ep_rew_mean      | 607      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3264     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3153     |\n","|    total_timesteps  | 766845   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0369   |\n","|    n_updates        | 166711   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.09e+03 |\n","|    ep_rew_mean      | 612      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3268     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3159     |\n","|    total_timesteps  | 768079   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0692   |\n","|    n_updates        | 167019   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.11e+03 |\n","|    ep_rew_mean      | 615      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3272     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3163     |\n","|    total_timesteps  | 769283   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0276   |\n","|    n_updates        | 167320   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.04e+03 |\n","|    ep_rew_mean      | 606      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3276     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3171     |\n","|    total_timesteps  | 771038   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0223   |\n","|    n_updates        | 167759   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.05e+03 |\n","|    ep_rew_mean      | 608      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3280     |\n","|    fps              | 243      |\n","|    time_elapsed     | 3178     |\n","|    total_timesteps  | 772671   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0231   |\n","|    n_updates        | 168167   |\n","----------------------------------\n","Eval num_timesteps=775000, episode_reward=548.00 +/- 232.37\n","Episode length: 4043.00 +/- 1484.57\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.04e+03 |\n","|    mean_reward      | 548      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 775000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.019    |\n","|    n_updates        | 168749   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.07e+03 |\n","|    ep_rew_mean      | 614      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3284     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3199     |\n","|    total_timesteps  | 775066   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.027    |\n","|    n_updates        | 168766   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.08e+03 |\n","|    ep_rew_mean      | 615      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3288     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3206     |\n","|    total_timesteps  | 776442   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0503   |\n","|    n_updates        | 169110   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.1e+03  |\n","|    ep_rew_mean      | 616      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3292     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3212     |\n","|    total_timesteps  | 778085   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00819  |\n","|    n_updates        | 169521   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.1e+03  |\n","|    ep_rew_mean      | 618      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3296     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3215     |\n","|    total_timesteps  | 778957   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00844  |\n","|    n_updates        | 169739   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.1e+03  |\n","|    ep_rew_mean      | 618      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3300     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3222     |\n","|    total_timesteps  | 780403   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0281   |\n","|    n_updates        | 170100   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.09e+03 |\n","|    ep_rew_mean      | 616      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3304     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3226     |\n","|    total_timesteps  | 781474   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0193   |\n","|    n_updates        | 170368   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.12e+03 |\n","|    ep_rew_mean      | 623      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3308     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3232     |\n","|    total_timesteps  | 783042   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0375   |\n","|    n_updates        | 170760   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.14e+03 |\n","|    ep_rew_mean      | 627      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3312     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3239     |\n","|    total_timesteps  | 784673   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00975  |\n","|    n_updates        | 171168   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.17e+03 |\n","|    ep_rew_mean      | 632      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3316     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3245     |\n","|    total_timesteps  | 786116   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00638  |\n","|    n_updates        | 171528   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.16e+03 |\n","|    ep_rew_mean      | 633      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3320     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3250     |\n","|    total_timesteps  | 787625   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0191   |\n","|    n_updates        | 171906   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.15e+03 |\n","|    ep_rew_mean      | 635      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3324     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3255     |\n","|    total_timesteps  | 788639   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0247   |\n","|    n_updates        | 172159   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.14e+03 |\n","|    ep_rew_mean      | 634      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3328     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3260     |\n","|    total_timesteps  | 789965   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0183   |\n","|    n_updates        | 172491   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.14e+03 |\n","|    ep_rew_mean      | 633      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3332     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3265     |\n","|    total_timesteps  | 791338   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0475   |\n","|    n_updates        | 172834   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.16e+03 |\n","|    ep_rew_mean      | 639      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3336     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3271     |\n","|    total_timesteps  | 792664   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0384   |\n","|    n_updates        | 173165   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.16e+03 |\n","|    ep_rew_mean      | 641      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3340     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3277     |\n","|    total_timesteps  | 794360   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0856   |\n","|    n_updates        | 173589   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.16e+03 |\n","|    ep_rew_mean      | 640      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3344     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3285     |\n","|    total_timesteps  | 796118   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0364   |\n","|    n_updates        | 174029   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.15e+03 |\n","|    ep_rew_mean      | 638      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3348     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3289     |\n","|    total_timesteps  | 797059   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00957  |\n","|    n_updates        | 174264   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.16e+03 |\n","|    ep_rew_mean      | 641      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3352     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3295     |\n","|    total_timesteps  | 798595   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0164   |\n","|    n_updates        | 174648   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.11e+03 |\n","|    ep_rew_mean      | 631      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3356     |\n","|    fps              | 242      |\n","|    time_elapsed     | 3301     |\n","|    total_timesteps  | 799987   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0217   |\n","|    n_updates        | 174996   |\n","----------------------------------\n","Eval num_timesteps=800000, episode_reward=657.00 +/- 252.72\n","Episode length: 4466.20 +/- 1595.76\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.47e+03 |\n","|    mean_reward      | 657      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 800000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0161   |\n","|    n_updates        | 174999   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.09e+03 |\n","|    ep_rew_mean      | 628      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3360     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3322     |\n","|    total_timesteps  | 802037   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0136   |\n","|    n_updates        | 175509   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.09e+03 |\n","|    ep_rew_mean      | 626      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3364     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3328     |\n","|    total_timesteps  | 803727   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0211   |\n","|    n_updates        | 175931   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.13e+03 |\n","|    ep_rew_mean      | 629      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3368     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3335     |\n","|    total_timesteps  | 805415   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0342   |\n","|    n_updates        | 176353   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.1e+03  |\n","|    ep_rew_mean      | 624      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3372     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3343     |\n","|    total_timesteps  | 807209   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.136    |\n","|    n_updates        | 176802   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.08e+03 |\n","|    ep_rew_mean      | 621      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3376     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3349     |\n","|    total_timesteps  | 808739   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0267   |\n","|    n_updates        | 177184   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.08e+03 |\n","|    ep_rew_mean      | 623      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3380     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3355     |\n","|    total_timesteps  | 810283   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0652   |\n","|    n_updates        | 177570   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.06e+03 |\n","|    ep_rew_mean      | 616      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3384     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3361     |\n","|    total_timesteps  | 811638   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0258   |\n","|    n_updates        | 177909   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 609      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3388     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3370     |\n","|    total_timesteps  | 813994   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0198   |\n","|    n_updates        | 178498   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 608      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3392     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3373     |\n","|    total_timesteps  | 814641   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0272   |\n","|    n_updates        | 178660   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 606      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3396     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3382     |\n","|    total_timesteps  | 816812   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0185   |\n","|    n_updates        | 179202   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 605      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3400     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3389     |\n","|    total_timesteps  | 818857   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0185   |\n","|    n_updates        | 179714   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 605      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3404     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3394     |\n","|    total_timesteps  | 819855   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0107   |\n","|    n_updates        | 179963   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 602      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3408     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3399     |\n","|    total_timesteps  | 821227   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0169   |\n","|    n_updates        | 180306   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 599      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3412     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3407     |\n","|    total_timesteps  | 823111   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0154   |\n","|    n_updates        | 180777   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 597      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3416     |\n","|    fps              | 241      |\n","|    time_elapsed     | 3412     |\n","|    total_timesteps  | 824246   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0203   |\n","|    n_updates        | 181061   |\n","----------------------------------\n","Eval num_timesteps=825000, episode_reward=691.00 +/- 115.86\n","Episode length: 4365.80 +/- 357.58\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.37e+03 |\n","|    mean_reward      | 691      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 825000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0266   |\n","|    n_updates        | 181249   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 599      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3420     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3431     |\n","|    total_timesteps  | 825994   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.023    |\n","|    n_updates        | 181498   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 603      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3424     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3438     |\n","|    total_timesteps  | 827933   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0126   |\n","|    n_updates        | 181983   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 597      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3428     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3445     |\n","|    total_timesteps  | 829450   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0367   |\n","|    n_updates        | 182362   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.96e+03 |\n","|    ep_rew_mean      | 593      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3432     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3448     |\n","|    total_timesteps  | 830232   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.042    |\n","|    n_updates        | 182557   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.95e+03 |\n","|    ep_rew_mean      | 596      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3436     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3457     |\n","|    total_timesteps  | 832286   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0213   |\n","|    n_updates        | 183071   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.94e+03 |\n","|    ep_rew_mean      | 596      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3440     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3462     |\n","|    total_timesteps  | 833626   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0661   |\n","|    n_updates        | 183406   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.93e+03 |\n","|    ep_rew_mean      | 595      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3444     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3472     |\n","|    total_timesteps  | 836100   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0146   |\n","|    n_updates        | 184024   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 605      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3448     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3483     |\n","|    total_timesteps  | 838950   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0226   |\n","|    n_updates        | 184737   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 609      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3452     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3491     |\n","|    total_timesteps  | 841143   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0237   |\n","|    n_updates        | 185285   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 610      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3456     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3498     |\n","|    total_timesteps  | 842627   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00735  |\n","|    n_updates        | 185656   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 609      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3460     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3509     |\n","|    total_timesteps  | 845402   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0107   |\n","|    n_updates        | 186350   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 612      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3464     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3518     |\n","|    total_timesteps  | 847735   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0162   |\n","|    n_updates        | 186933   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 608      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3468     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3525     |\n","|    total_timesteps  | 849428   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0189   |\n","|    n_updates        | 187356   |\n","----------------------------------\n","Eval num_timesteps=850000, episode_reward=712.00 +/- 222.18\n","Episode length: 4293.00 +/- 996.81\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.29e+03 |\n","|    mean_reward      | 712      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 850000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0506   |\n","|    n_updates        | 187499   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 610      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3472     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3545     |\n","|    total_timesteps  | 851450   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0353   |\n","|    n_updates        | 187862   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 607      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3476     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3548     |\n","|    total_timesteps  | 852169   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0738   |\n","|    n_updates        | 188042   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 607      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3480     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3555     |\n","|    total_timesteps  | 854045   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0388   |\n","|    n_updates        | 188511   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 605      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3484     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3560     |\n","|    total_timesteps  | 855326   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0122   |\n","|    n_updates        | 188831   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 604      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3488     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3566     |\n","|    total_timesteps  | 856870   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0174   |\n","|    n_updates        | 189217   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 599      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3492     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3570     |\n","|    total_timesteps  | 857688   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0222   |\n","|    n_updates        | 189421   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4e+03    |\n","|    ep_rew_mean      | 595      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3496     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3582     |\n","|    total_timesteps  | 860609   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0549   |\n","|    n_updates        | 190152   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 599      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3500     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3588     |\n","|    total_timesteps  | 862224   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0132   |\n","|    n_updates        | 190555   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 594      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3504     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3595     |\n","|    total_timesteps  | 863893   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0165   |\n","|    n_updates        | 190973   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 592      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3508     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3599     |\n","|    total_timesteps  | 864944   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0519   |\n","|    n_updates        | 191235   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 595      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3512     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3610     |\n","|    total_timesteps  | 867707   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0316   |\n","|    n_updates        | 191926   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 596      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3516     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3615     |\n","|    total_timesteps  | 869166   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.019    |\n","|    n_updates        | 192291   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.1e+03  |\n","|    ep_rew_mean      | 610      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3520     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3626     |\n","|    total_timesteps  | 871767   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.054    |\n","|    n_updates        | 192941   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.1e+03  |\n","|    ep_rew_mean      | 609      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3524     |\n","|    fps              | 240      |\n","|    time_elapsed     | 3633     |\n","|    total_timesteps  | 873405   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0138   |\n","|    n_updates        | 193351   |\n","----------------------------------\n","Eval num_timesteps=875000, episode_reward=468.00 +/- 70.33\n","Episode length: 4099.20 +/- 585.87\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.1e+03  |\n","|    mean_reward      | 468      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 875000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0293   |\n","|    n_updates        | 193749   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.09e+03 |\n","|    ep_rew_mean      | 606      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3528     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3654     |\n","|    total_timesteps  | 875825   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0113   |\n","|    n_updates        | 193956   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.07e+03 |\n","|    ep_rew_mean      | 607      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3532     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3659     |\n","|    total_timesteps  | 877069   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0272   |\n","|    n_updates        | 194267   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.06e+03 |\n","|    ep_rew_mean      | 606      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3536     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3667     |\n","|    total_timesteps  | 879063   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0115   |\n","|    n_updates        | 194765   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.11e+03 |\n","|    ep_rew_mean      | 618      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3540     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3683     |\n","|    total_timesteps  | 882976   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0171   |\n","|    n_updates        | 195743   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.08e+03 |\n","|    ep_rew_mean      | 610      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3544     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3693     |\n","|    total_timesteps  | 885287   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0118   |\n","|    n_updates        | 196321   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.07e+03 |\n","|    ep_rew_mean      | 607      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3548     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3699     |\n","|    total_timesteps  | 886845   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0201   |\n","|    n_updates        | 196711   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.07e+03 |\n","|    ep_rew_mean      | 610      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3552     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3702     |\n","|    total_timesteps  | 887796   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.023    |\n","|    n_updates        | 196948   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.09e+03 |\n","|    ep_rew_mean      | 612      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3556     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3708     |\n","|    total_timesteps  | 889154   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00807  |\n","|    n_updates        | 197288   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.09e+03 |\n","|    ep_rew_mean      | 612      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3560     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3714     |\n","|    total_timesteps  | 890693   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0205   |\n","|    n_updates        | 197673   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.1e+03  |\n","|    ep_rew_mean      | 616      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3564     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3721     |\n","|    total_timesteps  | 892254   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0189   |\n","|    n_updates        | 198063   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.1e+03  |\n","|    ep_rew_mean      | 618      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3568     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3725     |\n","|    total_timesteps  | 893418   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0174   |\n","|    n_updates        | 198354   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.09e+03 |\n","|    ep_rew_mean      | 613      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3572     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3731     |\n","|    total_timesteps  | 894741   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0195   |\n","|    n_updates        | 198685   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.06e+03 |\n","|    ep_rew_mean      | 613      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3576     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3738     |\n","|    total_timesteps  | 896598   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.125    |\n","|    n_updates        | 199149   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.06e+03 |\n","|    ep_rew_mean      | 608      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3580     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3744     |\n","|    total_timesteps  | 898012   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0132   |\n","|    n_updates        | 199502   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.07e+03 |\n","|    ep_rew_mean      | 608      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3584     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3751     |\n","|    total_timesteps  | 899801   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00976  |\n","|    n_updates        | 199950   |\n","----------------------------------\n","Eval num_timesteps=900000, episode_reward=647.00 +/- 104.43\n","Episode length: 4145.80 +/- 554.99\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.15e+03 |\n","|    mean_reward      | 647      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 900000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0142   |\n","|    n_updates        | 199999   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.08e+03 |\n","|    ep_rew_mean      | 606      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3588     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3768     |\n","|    total_timesteps  | 901153   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0247   |\n","|    n_updates        | 200288   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.09e+03 |\n","|    ep_rew_mean      | 606      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3592     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3772     |\n","|    total_timesteps  | 902086   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0134   |\n","|    n_updates        | 200521   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.07e+03 |\n","|    ep_rew_mean      | 602      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3596     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3777     |\n","|    total_timesteps  | 903484   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00956  |\n","|    n_updates        | 200870   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 598      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3600     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3784     |\n","|    total_timesteps  | 905031   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0129   |\n","|    n_updates        | 201257   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 606      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3604     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3793     |\n","|    total_timesteps  | 907534   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0153   |\n","|    n_updates        | 201883   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 607      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3608     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3802     |\n","|    total_timesteps  | 909784   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0233   |\n","|    n_updates        | 202445   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.06e+03 |\n","|    ep_rew_mean      | 617      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3612     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3812     |\n","|    total_timesteps  | 912373   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0124   |\n","|    n_updates        | 203093   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.12e+03 |\n","|    ep_rew_mean      | 626      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3616     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3822     |\n","|    total_timesteps  | 914593   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0277   |\n","|    n_updates        | 203648   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.11e+03 |\n","|    ep_rew_mean      | 625      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3620     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3831     |\n","|    total_timesteps  | 916908   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0254   |\n","|    n_updates        | 204226   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.17e+03 |\n","|    ep_rew_mean      | 634      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3624     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3836     |\n","|    total_timesteps  | 918250   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0114   |\n","|    n_updates        | 204562   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.18e+03 |\n","|    ep_rew_mean      | 635      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3628     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3841     |\n","|    total_timesteps  | 919552   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0188   |\n","|    n_updates        | 204887   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.19e+03 |\n","|    ep_rew_mean      | 637      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3632     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3851     |\n","|    total_timesteps  | 922050   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0388   |\n","|    n_updates        | 205512   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.2e+03  |\n","|    ep_rew_mean      | 640      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3636     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3858     |\n","|    total_timesteps  | 923695   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0418   |\n","|    n_updates        | 205923   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.18e+03 |\n","|    ep_rew_mean      | 636      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3640     |\n","|    fps              | 239      |\n","|    time_elapsed     | 3862     |\n","|    total_timesteps  | 924770   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0213   |\n","|    n_updates        | 206192   |\n","----------------------------------\n","Eval num_timesteps=925000, episode_reward=838.00 +/- 302.86\n","Episode length: 5350.20 +/- 1324.31\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 5.35e+03 |\n","|    mean_reward      | 838      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 925000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0133   |\n","|    n_updates        | 206249   |\n","----------------------------------\n","New best mean reward!\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.2e+03  |\n","|    ep_rew_mean      | 640      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3644     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3883     |\n","|    total_timesteps  | 926094   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0319   |\n","|    n_updates        | 206523   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.23e+03 |\n","|    ep_rew_mean      | 652      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3648     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3896     |\n","|    total_timesteps  | 929384   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00733  |\n","|    n_updates        | 207345   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.22e+03 |\n","|    ep_rew_mean      | 648      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3652     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3900     |\n","|    total_timesteps  | 930461   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0299   |\n","|    n_updates        | 207615   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.21e+03 |\n","|    ep_rew_mean      | 646      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3656     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3911     |\n","|    total_timesteps  | 932900   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0122   |\n","|    n_updates        | 208224   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.27e+03 |\n","|    ep_rew_mean      | 652      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3660     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3918     |\n","|    total_timesteps  | 934662   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0167   |\n","|    n_updates        | 208665   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.29e+03 |\n","|    ep_rew_mean      | 657      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3664     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3926     |\n","|    total_timesteps  | 936786   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0537   |\n","|    n_updates        | 209196   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.29e+03 |\n","|    ep_rew_mean      | 652      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3668     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3934     |\n","|    total_timesteps  | 938480   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0208   |\n","|    n_updates        | 209619   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.29e+03 |\n","|    ep_rew_mean      | 655      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3672     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3939     |\n","|    total_timesteps  | 939806   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0129   |\n","|    n_updates        | 209951   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.28e+03 |\n","|    ep_rew_mean      | 660      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3676     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3946     |\n","|    total_timesteps  | 941589   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00926  |\n","|    n_updates        | 210397   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.25e+03 |\n","|    ep_rew_mean      | 658      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3680     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3950     |\n","|    total_timesteps  | 942698   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0176   |\n","|    n_updates        | 210674   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.2e+03  |\n","|    ep_rew_mean      | 649      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3684     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3961     |\n","|    total_timesteps  | 945368   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0196   |\n","|    n_updates        | 211341   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.19e+03 |\n","|    ep_rew_mean      | 649      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3688     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3969     |\n","|    total_timesteps  | 947291   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0485   |\n","|    n_updates        | 211822   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.19e+03 |\n","|    ep_rew_mean      | 649      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3692     |\n","|    fps              | 238      |\n","|    time_elapsed     | 3977     |\n","|    total_timesteps  | 949315   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0173   |\n","|    n_updates        | 212328   |\n","----------------------------------\n","Eval num_timesteps=950000, episode_reward=810.00 +/- 405.36\n","Episode length: 4782.40 +/- 1120.77\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.78e+03 |\n","|    mean_reward      | 810      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 950000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0292   |\n","|    n_updates        | 212499   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.17e+03 |\n","|    ep_rew_mean      | 645      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3696     |\n","|    fps              | 237      |\n","|    time_elapsed     | 3997     |\n","|    total_timesteps  | 950988   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0165   |\n","|    n_updates        | 212746   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.13e+03 |\n","|    ep_rew_mean      | 646      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3700     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4009     |\n","|    total_timesteps  | 953836   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0135   |\n","|    n_updates        | 213458   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.14e+03 |\n","|    ep_rew_mean      | 648      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3704     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4014     |\n","|    total_timesteps  | 955128   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0194   |\n","|    n_updates        | 213781   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.11e+03 |\n","|    ep_rew_mean      | 647      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3708     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4020     |\n","|    total_timesteps  | 956522   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0521   |\n","|    n_updates        | 214130   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.1e+03  |\n","|    ep_rew_mean      | 643      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3712     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4026     |\n","|    total_timesteps  | 958123   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0443   |\n","|    n_updates        | 214530   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.08e+03 |\n","|    ep_rew_mean      | 648      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3716     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4035     |\n","|    total_timesteps  | 960402   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0183   |\n","|    n_updates        | 215100   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.04e+03 |\n","|    ep_rew_mean      | 644      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3720     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4044     |\n","|    total_timesteps  | 962559   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0888   |\n","|    n_updates        | 215639   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.04e+03 |\n","|    ep_rew_mean      | 650      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3724     |\n","|    fps              | 238      |\n","|    time_elapsed     | 4053     |\n","|    total_timesteps  | 964982   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0175   |\n","|    n_updates        | 216245   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 647      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3728     |\n","|    fps              | 238      |\n","|    time_elapsed     | 4059     |\n","|    total_timesteps  | 966237   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0129   |\n","|    n_updates        | 216559   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 643      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3732     |\n","|    fps              | 238      |\n","|    time_elapsed     | 4064     |\n","|    total_timesteps  | 967595   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.025    |\n","|    n_updates        | 216898   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.96e+03 |\n","|    ep_rew_mean      | 638      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3736     |\n","|    fps              | 238      |\n","|    time_elapsed     | 4068     |\n","|    total_timesteps  | 968613   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0273   |\n","|    n_updates        | 217153   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.97e+03 |\n","|    ep_rew_mean      | 638      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3740     |\n","|    fps              | 238      |\n","|    time_elapsed     | 4077     |\n","|    total_timesteps  | 970741   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00721  |\n","|    n_updates        | 217685   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.97e+03 |\n","|    ep_rew_mean      | 639      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3744     |\n","|    fps              | 238      |\n","|    time_elapsed     | 4082     |\n","|    total_timesteps  | 972109   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0856   |\n","|    n_updates        | 218027   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.97e+03 |\n","|    ep_rew_mean      | 640      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3748     |\n","|    fps              | 238      |\n","|    time_elapsed     | 4091     |\n","|    total_timesteps  | 974231   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0242   |\n","|    n_updates        | 218557   |\n","----------------------------------\n","Eval num_timesteps=975000, episode_reward=598.00 +/- 123.92\n","Episode length: 4229.00 +/- 267.55\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 4.23e+03 |\n","|    mean_reward      | 598      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 975000   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0162   |\n","|    n_updates        | 218749   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 639      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3752     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4109     |\n","|    total_timesteps  | 975702   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0118   |\n","|    n_updates        | 218925   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 639      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3756     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4116     |\n","|    total_timesteps  | 977529   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0199   |\n","|    n_updates        | 219382   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 640      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3760     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4121     |\n","|    total_timesteps  | 978602   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0945   |\n","|    n_updates        | 219650   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 639      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3764     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4126     |\n","|    total_timesteps  | 979931   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.014    |\n","|    n_updates        | 219982   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.97e+03 |\n","|    ep_rew_mean      | 639      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3768     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4138     |\n","|    total_timesteps  | 982790   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0127   |\n","|    n_updates        | 220697   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.99e+03 |\n","|    ep_rew_mean      | 645      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3772     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4143     |\n","|    total_timesteps  | 983990   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0215   |\n","|    n_updates        | 220997   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 646      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3776     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4151     |\n","|    total_timesteps  | 985987   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0159   |\n","|    n_updates        | 221496   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 3.98e+03 |\n","|    ep_rew_mean      | 644      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3780     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4159     |\n","|    total_timesteps  | 987854   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0193   |\n","|    n_updates        | 221963   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 654      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3784     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4164     |\n","|    total_timesteps  | 989271   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0124   |\n","|    n_updates        | 222317   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 655      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3788     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4170     |\n","|    total_timesteps  | 990627   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0372   |\n","|    n_updates        | 222656   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 649      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3792     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4178     |\n","|    total_timesteps  | 992869   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0365   |\n","|    n_updates        | 223217   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 649      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3796     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4183     |\n","|    total_timesteps  | 993914   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.00835  |\n","|    n_updates        | 223478   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.01e+03 |\n","|    ep_rew_mean      | 645      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3800     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4189     |\n","|    total_timesteps  | 995535   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0113   |\n","|    n_updates        | 223883   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.02e+03 |\n","|    ep_rew_mean      | 648      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3804     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4195     |\n","|    total_timesteps  | 996842   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0087   |\n","|    n_updates        | 224210   |\n","----------------------------------\n","----------------------------------\n","| rollout/            |          |\n","|    ep_len_mean      | 4.03e+03 |\n","|    ep_rew_mean      | 653      |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    episodes         | 3808     |\n","|    fps              | 237      |\n","|    time_elapsed     | 4203     |\n","|    total_timesteps  | 999076   |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.0299   |\n","|    n_updates        | 224768   |\n","----------------------------------\n","Eval num_timesteps=1000000, episode_reward=493.00 +/- 74.07\n","Episode length: 3349.00 +/- 598.13\n","----------------------------------\n","| eval/               |          |\n","|    mean_ep_length   | 3.35e+03 |\n","|    mean_reward      | 493      |\n","| rollout/            |          |\n","|    exploration_rate | 0.01     |\n","| time/               |          |\n","|    total_timesteps  | 1000000  |\n","| train/              |          |\n","|    learning_rate    | 0.0001   |\n","|    loss             | 0.126    |\n","|    n_updates        | 224999   |\n","----------------------------------\n","Saving to logs//dqn/SpaceInvadersNoFrameskip-v4_1\n"]}],"source":["!python -m rl_zoo3.train --algo dqn --env SpaceInvadersNoFrameskip-v4  -f logs/ -c dqn.yml"]},{"cell_type":"markdown","metadata":{"id":"SeChoX-3SZfP"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PuocgdokSab9"},"outputs":[],"source":["!python -m rl_zoo3.train --algo dqn  --env SpaceInvadersNoFrameskip-v4 -f logs/ -c dqn.yml"]},{"cell_type":"markdown","metadata":{"id":"_dLomIiMKQaf"},"source":["## Let's evaluate our agent 👀\n","- RL-Baselines3-Zoo provides `enjoy.py`, a python script to evaluate our agent. In most RL libraries, we call the evaluation script `enjoy.py`.\n","- Let's evaluate it for 5000 timesteps 🔥"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"co5um_KeKbBJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696590557990,"user_tz":-60,"elapsed":26635,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"4b4834a0-9a3b-4ea0-dc15-ec928e85b790"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-06 11:08:54.641826: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2023-10-06 11:08:55.861749: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Loading latest experiment, id=1\n","Loading logs/dqn/SpaceInvadersNoFrameskip-v4_1/SpaceInvadersNoFrameskip-v4.zip\n","A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n","[Powered by Stella]\n","Stacking 4 frames\n","Atari Episode Score: 1315.00\n","Atari Episode Length 6701\n","Atari Episode Score: 415.00\n","Atari Episode Length 3195\n","Atari Episode Score: 520.00\n","Atari Episode Length 3261\n","Atari Episode Score: 600.00\n","Atari Episode Length 3773\n"]}],"source":["!python -m rl_zoo3.enjoy  --algo dqn --env SpaceInvadersNoFrameskip-v4 --no-render --n-timesteps 5000 --folder logs/"]},{"cell_type":"markdown","metadata":{"id":"Q24K1tyWSj7t"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"P_uSmwGRSk0z"},"outputs":[],"source":["!python -m rl_zoo3.enjoy  --algo dqn  --env SpaceInvadersNoFrameskip-v4  --no-render  --n-timesteps 5000  --folder logs/"]},{"cell_type":"markdown","metadata":{"id":"liBeTltiHJtr"},"source":["## Publish our trained model on the Hub 🚀\n","Now that we saw we got good results after the training, we can publish our trained model on the hub 🤗 with one line of code.\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit3/space-invaders-model.gif\" alt=\"Space Invaders model\">"]},{"cell_type":"markdown","metadata":{"id":"ezbHS1q3HYVV"},"source":["By using `rl_zoo3.push_to_hub` **you evaluate, record a replay, generate a model card of your agent and push it to the hub**.\n","\n","This way:\n","- You can **showcase our work** 🔥\n","- You can **visualize your agent playing** 👀\n","- You can **share with the community an agent that others can use** 💾\n","- You can **access a leaderboard 🏆 to see how well your agent is performing compared to your classmates** 👉  https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard"]},{"cell_type":"markdown","metadata":{"id":"XMSeZRBiHk6X"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1️⃣ (If it's not already done) create an account to HF ➡ https://huggingface.co/join\n","\n","2️⃣ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/create-token.jpg\" alt=\"Create HF Token\">"]},{"cell_type":"markdown","metadata":{"id":"9O6FI0F8HnzE"},"source":["- Copy the token\n","- Run the cell below and past the token"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"Ppu9yePwHrZX","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["8b26dd256d9046aabeb92f0d9f68e527","b63eda5abce34f8cad97d98a2290ad32","6be3c02ba56c4b5e8f378f01ed938646","f02cd1d54b2f4b039d787720a621a133","c30a027f58464d5c913fb68d8df9c0b6","3d9c6fe75541435998fcc7d634a7fa32","755efdbda0c843a397a544599fd1b3ae","1018363e5bfc4b8bb29257ad54066450","5c0c048031fe42bf90f27a7a5a67d475","ad3159ed419f4ce2bf40254b6cf09d57","ffa44fd8b43548f39fb3ea1e03a40616","ec5b669d8555490498c62d8a883ba7c2","27f4a1c6884542349b2da60f265024c0","7889c55f3174477bb3872459b0eac46c","eb8ba9da4bbc4b17b92fb85e57cb51bf","d6b0ac2d5f234a489d5a51a7ab6b46e1","000c81960541450e9d3c11dbb2a5e458","f7455c63e55b46a29b8359770d37e9eb","7b50fca49ac743a28bf376fa924a9843","40ac7b3245134b4dbc3070c3ac838d53","bbd19e48cbcd4eeaaaf38d8b870161e6","9732fa3d953c49a3ab36f78bee7657f9","22c51cc5261643bbb17901aa5ed74821","60ba997532ab4370a40da3ef4e6a27b2","465d5a47a4d140df89456520d98af3ac","7273ac76d5b042febcfa62905d9c19aa","e828447348a5436c897b583680db3c25","ffde39d25d034a3ea2ac9e13cbd7dc99","4017f6fe2976446bbb2b2a531f513a31","25c8eeeef3e04858b9c9408ec091fd07","e88faaa6609e42d4892556f4f70c9d7f","65b0b42963bf459b8692456d366b6425"]},"executionInfo":{"status":"ok","timestamp":1696590569406,"user_tz":-60,"elapsed":514,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"cae82ef9-2722-4aad-f4b2-07a0d9f6d792"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8b26dd256d9046aabeb92f0d9f68e527"}},"metadata":{}}],"source":["from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.\n","notebook_login()\n","!git config --global credential.helper store"]},{"cell_type":"markdown","metadata":{"id":"2RVEdunPHs8B"},"source":["If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login`"]},{"cell_type":"markdown","metadata":{"id":"dSLwdmvhHvjw"},"source":["3️⃣ We're now ready to push our trained agent to the 🤗 Hub 🔥"]},{"cell_type":"markdown","metadata":{"id":"PW436XnhHw1H"},"source":["Let's run push_to_hub.py file to upload our trained agent to the Hub.\n","\n","`--repo-name `: The name of the repo\n","\n","`-orga`: Your Hugging Face username\n","\n","`-f`: Where the trained model folder is (in our case `logs`)\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/unit3/select-id.png\" alt=\"Select Id\">"]},{"cell_type":"code","execution_count":13,"metadata":{"id":"Ygk2sEktTDEw","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696590680481,"user_tz":-60,"elapsed":62163,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"98aff527-8b0a-44aa-c0ae-989403af5e0d"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-06 11:10:21.746519: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2023-10-06 11:10:22.702495: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Loading latest experiment, id=1\n","Loading logs/dqn/SpaceInvadersNoFrameskip-v4_1/SpaceInvadersNoFrameskip-v4.zip\n","A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n","[Powered by Stella]\n","Stacking 4 frames\n","Wrapping the env in a VecTransposeImage.\n","Uploading to jake-walker/dqn-SpaceInvadersNoFrameskip-v4, make sure to have the rights\n","\u001b[38;5;4mℹ This function will save, evaluate, generate a video of your agent,\n","create a model card and push everything to the hub. It might take up to some\n","minutes if video generation is activated. This is a work in progress: if you\n","encounter a bug, please open an issue.\u001b[0m\n","Cloning https://huggingface.co/jake-walker/dqn-SpaceInvadersNoFrameskip-v4 into local empty directory.\n","WARNING:huggingface_hub.repository:Cloning https://huggingface.co/jake-walker/dqn-SpaceInvadersNoFrameskip-v4 into local empty directory.\n","Saving model to: hub/dqn-SpaceInvadersNoFrameskip-v4/dqn-SpaceInvadersNoFrameskip-v4\n","/usr/local/lib/python3.10/dist-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n","  logger.warn(\n","Saving video to /tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4\n","Moviepy - Building video /tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4.\n","Moviepy - Writing video /tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4\n","\n","Moviepy - Done !\n","Moviepy - video ready /tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4\n","ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers\n","  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)\n","  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared\n","  libavutil      56. 70.100 / 56. 70.100\n","  libavcodec     58.134.100 / 58.134.100\n","  libavformat    58. 76.100 / 58. 76.100\n","  libavdevice    58. 13.100 / 58. 13.100\n","  libavfilter     7.110.100 /  7.110.100\n","  libswscale      5.  9.100 /  5.  9.100\n","  libswresample   3.  9.100 /  3.  9.100\n","  libpostproc    55.  9.100 / 55.  9.100\n","Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4':\n","  Metadata:\n","    major_brand     : isom\n","    minor_version   : 512\n","    compatible_brands: isomiso2avc1mp41\n","    encoder         : Lavf58.29.100\n","  Duration: 00:00:33.40, start: 0.000000, bitrate: 54 kb/s\n","  Stream #0:0(und): Video: h264 (High) (avc1 / 0x31637661), yuv420p, 160x210, 52 kb/s, 30 fps, 30 tbr, 15360 tbn, 60 tbc (default)\n","    Metadata:\n","      handler_name    : VideoHandler\n","      vendor_id       : [0][0][0][0]\n","Stream mapping:\n","  Stream #0:0 -> #0:0 (h264 (native) -> h264 (libx264))\n","Press [q] to stop, [?] for help\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0musing cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mprofile High, level 1.2, 4:2:0, 8-bit\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0m264 - core 163 r3060 5db6aa6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2021 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=3 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00\n","Output #0, mp4, to 'hub/dqn-SpaceInvadersNoFrameskip-v4/replay.mp4':\n","  Metadata:\n","    major_brand     : isom\n","    minor_version   : 512\n","    compatible_brands: isomiso2avc1mp41\n","    encoder         : Lavf58.76.100\n","  Stream #0:0(und): Video: h264 (avc1 / 0x31637661), yuv420p(progressive), 160x210, q=2-31, 30 fps, 15360 tbn (default)\n","    Metadata:\n","      handler_name    : VideoHandler\n","      vendor_id       : [0][0][0][0]\n","      encoder         : Lavc58.134.100 libx264\n","    Side data:\n","      cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: N/A\n","frame= 1002 fps=0.0 q=-1.0 Lsize=     216kB time=00:00:33.30 bitrate=  53.3kbits/s speed=36.2x    \n","video:206kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 5.143969%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mframe I:5     Avg QP:18.04  size:  2726\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mframe P:524   Avg QP:22.26  size:   329\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mframe B:473   Avg QP:29.62  size:    51\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mconsecutive B-frames: 31.6% 13.4%  8.7% 46.3%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mmb I  I16..4: 20.4% 40.3% 39.3%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mmb P  I16..4:  0.6%  1.0%  0.9%  P16..4:  6.9%  2.7%  1.5%  0.0%  0.0%    skip:86.4%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mmb B  I16..4:  0.2%  0.1%  0.1%  B16..8: 10.1%  0.8%  0.1%  direct: 0.2%  skip:88.5%  L0:46.0% L1:53.5% BI: 0.5%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0m8x8 transform intra:40.1% inter:5.6%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mcoded y,uvDC,uvAC intra: 21.6% 41.0% 37.4% inter: 1.4% 1.8% 1.5%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mi16 v,h,dc,p: 43% 50%  7%  0%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mi8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 26%  8% 65%  1%  0%  0%  0%  0%  0%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mi4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 32% 11% 45%  2%  2%  3%  2%  2%  1%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mi8c dc,h,v,p: 54% 31% 14%  1%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mWeighted P-Frames: Y:0.0% UV:0.0%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mref P L0: 75.2%  4.7% 11.2%  8.8%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mref B L0: 82.8% 13.9%  3.3%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mref B L1: 95.9%  4.1%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mkb/s:50.33\n","\u001b[38;5;4mℹ Pushing repo dqn-SpaceInvadersNoFrameskip-v4 to the Hugging Face\n","Hub\u001b[0m\n","Upload file dqn-SpaceInvadersNoFrameskip-v4.zip:   0% 1.00/26.0M [00:00<?, ?B/s]\n","Upload file train_eval_metrics.zip:   0% 1.00/35.5k [00:00<?, ?B/s]\u001b[A\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/policy.pth:   0% 1.00/12.9M [00:00<?, ?B/s]\u001b[A\u001b[A\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth:   0% 1.00/431 [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","Upload file replay.mp4:   0% 1.00/216k [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth:   0% 1.00/12.9M [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth: 15.3MB [00:02, 7.99MB/s]     \u001b[A\u001b[A\u001b[A\n","Upload file train_eval_metrics.zip: 15.3MB [00:02, 7.99MB/s]       \u001b[A\n","\n","\n","\n","Upload file replay.mp4: 15.3MB [00:03, 5.33MB/s]      \u001b[A\u001b[A\u001b[A\u001b[A\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/policy.pth: 15.3MB [00:04, 4.00MB/s]       \u001b[A\u001b[A\n","\n","\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4.zip: 30.5MB [00:05, 7.15MB/s]                To https://huggingface.co/jake-walker/dqn-SpaceInvadersNoFrameskip-v4\n","   1e04436..a73d6d8  main -> main\n","\n","WARNING:huggingface_hub.repository:To https://huggingface.co/jake-walker/dqn-SpaceInvadersNoFrameskip-v4\n","   1e04436..a73d6d8  main -> main\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4.zip: 100% 26.0M/26.0M [00:07<00:00, 3.88MB/s]\n","\n","Upload file train_eval_metrics.zip: 100% 35.5k/35.5k [00:07<00:00, 5.19kB/s]\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/policy.pth: 100% 12.9M/12.9M [00:07<00:00, 1.93MB/s]\n","\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth: 100% 431/431 [00:07<00:00, 61.4B/s] \n","\n","\n","\n","\n","Upload file replay.mp4: 100% 216k/216k [00:07<00:00, 31.6kB/s]\n","\n","\n","\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth: 100% 12.9M/12.9M [00:07<00:00, 1.93MB/s]\n","\u001b[38;5;4mℹ Your model is pushed to the hub. You can view your model here:\n","https://huggingface.co/jake-walker/dqn-SpaceInvadersNoFrameskip-v4\u001b[0m\n"]}],"source":["!python -m rl_zoo3.push_to_hub  --algo dqn  --env SpaceInvadersNoFrameskip-v4  --repo-name dqn-SpaceInvadersNoFrameskip-v4 -orga jake-walker -f logs/"]},{"cell_type":"markdown","metadata":{"id":"otgpa0rhS9wR"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"_HQNlAXuEhci"},"outputs":[],"source":["!python -m rl_zoo3.push_to_hub  --algo dqn  --env SpaceInvadersNoFrameskip-v4  --repo-name dqn-SpaceInvadersNoFrameskip-v4  -orga ThomasSimonini  -f logs/"]},{"cell_type":"markdown","metadata":{"id":"0D4F5zsTTJ-L"},"source":["###."]},{"cell_type":"markdown","metadata":{"id":"ff89kd2HL1_s"},"source":["Congrats 🥳 you've just trained and uploaded your first Deep Q-Learning agent using RL-Baselines-3 Zoo. The script above should have displayed a link to a model repository such as https://huggingface.co/ThomasSimonini/dqn-SpaceInvadersNoFrameskip-v4. When you go to this link, you can:\n","\n","- See a **video preview of your agent** at the right.\n","- Click \"Files and versions\" to see all the files in the repository.\n","- Click \"Use in stable-baselines3\" to get a code snippet that shows how to load the model.\n","- A model card (`README.md` file) which gives a description of the model and the hyperparameters you used.\n","\n","Under the hood, the Hub uses git-based repositories (don't worry if you don't know what git is), which means you can update the model with new versions as you experiment and improve your agent.\n","\n","**Compare the results of your agents with your classmates** using the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) 🏆"]},{"cell_type":"markdown","metadata":{"id":"fyRKcCYY-dIo"},"source":["## Load a powerful trained model 🔥\n","- The Stable-Baselines3 team uploaded **more than 150 trained Deep Reinforcement Learning agents on the Hub**.\n","\n","You can find them here: 👉 https://huggingface.co/sb3\n","\n","Some examples:\n","- Asteroids: https://huggingface.co/sb3/dqn-AsteroidsNoFrameskip-v4\n","- Beam Rider: https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4\n","- Breakout: https://huggingface.co/sb3/dqn-BreakoutNoFrameskip-v4\n","- Road Runner: https://huggingface.co/sb3/dqn-RoadRunnerNoFrameskip-v4\n","\n","Let's load an agent playing Beam Rider: https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4"]},{"cell_type":"code","execution_count":14,"metadata":{"id":"B-9QVFIROI5Y","colab":{"base_uri":"https://localhost:8080/","height":231},"executionInfo":{"status":"ok","timestamp":1696590746205,"user_tz":-60,"elapsed":1039,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"cb55980e-9e1e-4616-c5d0-747fa1dc26fe"},"outputs":[{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["<video controls autoplay><source src=\"https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4/resolve/main/replay.mp4\" type=\"video/mp4\"></video>\n"]},"metadata":{}}],"source":["%%html\n","<video controls autoplay><source src=\"https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4/resolve/main/replay.mp4\" type=\"video/mp4\"></video>"]},{"cell_type":"markdown","metadata":{"id":"7ZQNY_r6NJtC"},"source":["1. We download the model using `rl_zoo3.load_from_hub`, and place it in a new folder that we can call `rl_trained`"]},{"cell_type":"code","execution_count":15,"metadata":{"id":"OdBNZHy0NGTR","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696590822816,"user_tz":-60,"elapsed":17577,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"43d0ceec-f667-4a84-dc19-2f33112077b0"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-06 11:13:28.520597: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2023-10-06 11:13:29.515283: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Downloading from https://huggingface.co/sb3/dqn-BreakoutNoFrameskip-v4\n","Downloading (…)utNoFrameskip-v4.zip: 100% 27.2M/27.2M [00:03<00:00, 8.26MB/s]\n","Downloading (…)olve/main/config.yml: 100% 548/548 [00:00<00:00, 2.98MB/s]\n","No normalization file\n","Downloading (…)esolve/main/args.yml: 100% 886/886 [00:00<00:00, 4.29MB/s]\n","Downloading (…)/main/env_kwargs.yml: 100% 3.00/3.00 [00:00<00:00, 12.9kB/s]\n","Downloading train_eval_metrics.zip: 100% 346k/346k [00:00<00:00, 369kB/s]\n","Saving to rl_trained/dqn/BreakoutNoFrameskip-v4_1\n"]}],"source":["# Download model and save it into the logs/ folder\n","!python -m rl_zoo3.load_from_hub --algo dqn --env BreakoutNoFrameskip-v4 -orga sb3 -f rl_trained/"]},{"cell_type":"markdown","metadata":{"id":"LFt6hmWsNdBo"},"source":["2. Let's evaluate if for 5000 timesteps"]},{"cell_type":"code","execution_count":16,"metadata":{"id":"aOxs0rNuN0uS","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696590862001,"user_tz":-60,"elapsed":26104,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"fe78ee25-82e2-49bd-8cb8-57f271d1f61b"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-06 11:13:57.837179: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2023-10-06 11:13:58.862053: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Loading latest experiment, id=1\n","Loading rl_trained/dqn/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip\n","A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n","[Powered by Stella]\n","Stacking 4 frames\n","/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/save_util.py:166: UserWarning: Could not deserialize object exploration_schedule. Consider using `custom_objects` argument to replace this object.\n","Exception: 'bytes' object cannot be interpreted as an integer\n","  warnings.warn(\n","/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/vec_env/patch_gym.py:95: UserWarning: You loaded a model that was trained using OpenAI Gym. We strongly recommend transitioning to Gymnasium by saving that model again.\n","  warnings.warn(\n","Atari Episode Score: 386.00\n","Atari Episode Length 7878\n","Atari Episode Score: 374.00\n","Atari Episode Length 9586\n"]}],"source":["!python -m rl_zoo3.enjoy --algo dqn --env BreakoutNoFrameskip-v4 -n 5000  -f rl_trained/ --no-render"]},{"cell_type":"markdown","metadata":{"id":"kxMDuDfPON57"},"source":["Why not trying to train your own **Deep Q-Learning Agent playing BeamRiderNoFrameskip-v4? 🏆.**\n","\n","If you want to try, check https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4#hyperparameters **in the model card, you have the hyperparameters of the trained agent.**"]},{"cell_type":"markdown","metadata":{"id":"xL_ZtUgpOuY6"},"source":["But finding hyperparameters can be a daunting task. Fortunately, we'll see in the next Unit, how we can **use Optuna for optimizing the Hyperparameters 🔥.**\n"]},{"cell_type":"markdown","metadata":{"id":"-pqaco8W-huW"},"source":["## Some additional challenges 🏆\n","The best way to learn **is to try things by your own**!\n","\n","In the [Leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) you will find your agents. Can you get to the top?\n","\n","Here's a list of environments you can try to train your agent with:\n","- BeamRiderNoFrameskip-v4\n","- BreakoutNoFrameskip-v4\n","- EnduroNoFrameskip-v4\n","- PongNoFrameskip-v4\n","\n","Also, **if you want to learn to implement Deep Q-Learning by yourself**, you definitely should look at CleanRL implementation: https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/dqn_atari.py\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit4/atari-envs.gif\" alt=\"Environments\"/>"]},{"cell_type":"markdown","metadata":{"id":"paS-XKo4-kmu"},"source":["________________________________________________________________________\n","Congrats on finishing this chapter!\n","\n","If you’re still feel confused with all these elements...it's totally normal! **This was the same for me and for all people who studied RL.**\n","\n","Take time to really **grasp the material before continuing and try the additional challenges**. It’s important to master these elements and having a solid foundations.\n","\n","In the next unit, **we’re going to learn about [Optuna](https://optuna.org/)**. One of the most critical task in Deep Reinforcement Learning is to find a good set of training hyperparameters. And Optuna is a library that helps you to automate the search.\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"5WRx7tO7-mvC"},"source":["\n","\n","### This is a course built with you 👷🏿‍♀️\n","\n","Finally, we want to improve and update the course iteratively with your feedback. If you have some, please fill this form 👉 https://forms.gle/3HgA7bEHwAmmLfwh9\n","\n","We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the Github Repo](https://github.com/huggingface/deep-rl-class/issues)."]},{"cell_type":"markdown","source":["See you on Bonus unit 2! 🔥"],"metadata":{"id":"Kc3udPT-RcXc"}},{"cell_type":"markdown","metadata":{"id":"fS3Xerx0fIMV"},"source":["### Keep Learning, Stay Awesome 🤗"]}],"metadata":{"colab":{"provenance":[{"file_id":"https://github.com/huggingface/deep-rl-class/blob/main/notebooks/unit3/unit3.ipynb","timestamp":1696584471906}],"gpuType":"T4","collapsed_sections":["SeChoX-3SZfP","Q24K1tyWSj7t","otgpa0rhS9wR"]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.6"},"varInspector":{"cols":{"lenName":16,"lenType":16,"lenVar":40},"kernels_config":{"python":{"delete_cmd_postfix":"","delete_cmd_prefix":"del ","library":"var_list.py","varRefreshCmd":"print(var_dic_list())"},"r":{"delete_cmd_postfix":") ","delete_cmd_prefix":"rm(","library":"var_list.r","varRefreshCmd":"cat(var_dic_list()) "}},"types_to_exclude":["module","function","builtin_function_or_method","instance","_Feature"],"window_display":false},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"8b26dd256d9046aabeb92f0d9f68e527":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_bbd19e48cbcd4eeaaaf38d8b870161e6","IPY_MODEL_9732fa3d953c49a3ab36f78bee7657f9","IPY_MODEL_22c51cc5261643bbb17901aa5ed74821","IPY_MODEL_60ba997532ab4370a40da3ef4e6a27b2"],"layout":"IPY_MODEL_755efdbda0c843a397a544599fd1b3ae"}},"b63eda5abce34f8cad97d98a2290ad32":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1018363e5bfc4b8bb29257ad54066450","placeholder":"​","style":"IPY_MODEL_5c0c048031fe42bf90f27a7a5a67d475","value":"<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"}},"6be3c02ba56c4b5e8f378f01ed938646":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_ad3159ed419f4ce2bf40254b6cf09d57","placeholder":"​","style":"IPY_MODEL_ffa44fd8b43548f39fb3ea1e03a40616","value":""}},"f02cd1d54b2f4b039d787720a621a133":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_ec5b669d8555490498c62d8a883ba7c2","style":"IPY_MODEL_27f4a1c6884542349b2da60f265024c0","value":true}},"c30a027f58464d5c913fb68d8df9c0b6":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_7889c55f3174477bb3872459b0eac46c","style":"IPY_MODEL_eb8ba9da4bbc4b17b92fb85e57cb51bf","tooltip":""}},"3d9c6fe75541435998fcc7d634a7fa32":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d6b0ac2d5f234a489d5a51a7ab6b46e1","placeholder":"​","style":"IPY_MODEL_000c81960541450e9d3c11dbb2a5e458","value":"\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"}},"755efdbda0c843a397a544599fd1b3ae":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"1018363e5bfc4b8bb29257ad54066450":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5c0c048031fe42bf90f27a7a5a67d475":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ad3159ed419f4ce2bf40254b6cf09d57":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ffa44fd8b43548f39fb3ea1e03a40616":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ec5b669d8555490498c62d8a883ba7c2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"27f4a1c6884542349b2da60f265024c0":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7889c55f3174477bb3872459b0eac46c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eb8ba9da4bbc4b17b92fb85e57cb51bf":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"d6b0ac2d5f234a489d5a51a7ab6b46e1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"000c81960541450e9d3c11dbb2a5e458":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f7455c63e55b46a29b8359770d37e9eb":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7b50fca49ac743a28bf376fa924a9843","placeholder":"​","style":"IPY_MODEL_40ac7b3245134b4dbc3070c3ac838d53","value":"Connecting..."}},"7b50fca49ac743a28bf376fa924a9843":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"40ac7b3245134b4dbc3070c3ac838d53":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bbd19e48cbcd4eeaaaf38d8b870161e6":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_465d5a47a4d140df89456520d98af3ac","placeholder":"​","style":"IPY_MODEL_7273ac76d5b042febcfa62905d9c19aa","value":"Token is valid (permission: write)."}},"9732fa3d953c49a3ab36f78bee7657f9":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e828447348a5436c897b583680db3c25","placeholder":"​","style":"IPY_MODEL_ffde39d25d034a3ea2ac9e13cbd7dc99","value":"Your token has been saved in your configured git credential helpers (store)."}},"22c51cc5261643bbb17901aa5ed74821":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4017f6fe2976446bbb2b2a531f513a31","placeholder":"​","style":"IPY_MODEL_25c8eeeef3e04858b9c9408ec091fd07","value":"Your token has been saved to /root/.cache/huggingface/token"}},"60ba997532ab4370a40da3ef4e6a27b2":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e88faaa6609e42d4892556f4f70c9d7f","placeholder":"​","style":"IPY_MODEL_65b0b42963bf459b8692456d366b6425","value":"Login successful"}},"465d5a47a4d140df89456520d98af3ac":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7273ac76d5b042febcfa62905d9c19aa":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e828447348a5436c897b583680db3c25":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ffde39d25d034a3ea2ac9e13cbd7dc99":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4017f6fe2976446bbb2b2a531f513a31":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"25c8eeeef3e04858b9c9408ec091fd07":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e88faaa6609e42d4892556f4f70c9d7f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"65b0b42963bf459b8692456d366b6425":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0}
\ No newline at end of file
diff --git a/HF DeepRL Course/Unit4 - Policy Gradient.ipynb b/HF DeepRL Course/Unit4 - Policy Gradient.ipynb
new file mode 100644
index 0000000..d0bb970
--- /dev/null
+++ b/HF DeepRL Course/Unit4 - Policy Gradient.ipynb	
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"markdown","metadata":{"id":"CjRWziAVU2lZ"},"source":["# Unit 4: Code your first Deep Reinforcement Learning Algorithm with PyTorch: Reinforce. And test its robustness 💪\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit6/thumbnail.png\" alt=\"thumbnail\"/>\n","\n","\n","In this notebook, you'll code your first Deep Reinforcement Learning algorithm from scratch: Reinforce (also called Monte Carlo Policy Gradient).\n","\n","Reinforce is a *Policy-based method*: a Deep Reinforcement Learning algorithm that tries **to optimize the policy directly without using an action-value function**.\n","\n","More precisely, Reinforce is a *Policy-gradient method*, a subclass of *Policy-based methods* that aims **to optimize the policy directly by estimating the weights of the optimal policy using gradient ascent**.\n","\n","To test its robustness, we're going to train it in 2 different simple environments:\n","- Cartpole-v1\n","- PixelcopterEnv\n","\n","⬇️ Here is an example of what **you will achieve at the end of this notebook.** ⬇️"]},{"cell_type":"markdown","source":["  <img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit6/envs.gif\" alt=\"Environments\"/>\n"],"metadata":{"id":"s4rBom2sbo7S"}},{"cell_type":"markdown","source":["### 🎮 Environments:\n","\n","- [CartPole-v1](https://www.gymlibrary.dev/environments/classic_control/cart_pole/)\n","- [PixelCopter](https://pygame-learning-environment.readthedocs.io/en/latest/user/games/pixelcopter.html)\n","\n","### 📚 RL-Library:\n","\n","- Python\n","- PyTorch\n","\n","\n","We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the GitHub Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"BPLwsPajb1f8"}},{"cell_type":"markdown","metadata":{"id":"L_WSo0VUV99t"},"source":["## Objectives of this notebook 🏆\n","At the end of the notebook, you will:\n","- Be able to **code from scratch a Reinforce algorithm using PyTorch.**\n","- Be able to **test the robustness of your agent using simple environments.**\n","- Be able to **push your trained agent to the Hub** with a nice video replay and an evaluation score 🔥."]},{"cell_type":"markdown","metadata":{"id":"lEPrZg2eWa4R"},"source":["## This notebook is from the Deep Reinforcement Learning Course\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/deep-rl-course-illustration.jpg\" alt=\"Deep RL Course illustration\"/>"]},{"cell_type":"markdown","metadata":{"id":"6p5HnEefISCB"},"source":["In this free course, you will:\n","\n","- 📖 Study Deep Reinforcement Learning in **theory and practice**.\n","- 🧑‍💻 Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- 🤖 Train **agents in unique environments**\n","\n","And more check 📚 the syllabus 👉 https://simoninithomas.github.io/deep-rl-course\n","\n","Don’t forget to **<a href=\"http://eepurl.com/ic5ZUD\">sign up to the course</a>** (we are collecting your email to be able to **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us 👉🏻 https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"mjY-eq3eWh9O"},"source":["## Prerequisites 🏗️\n","Before diving into the notebook, you need to:\n","\n","🔲 📚 [Study Policy Gradients by reading Unit 4](https://huggingface.co/deep-rl-course/unit4/introduction)"]},{"cell_type":"markdown","source":["# Let's code Reinforce algorithm from scratch 🔥\n","\n","\n","To validate this hands-on for the certification process, you need to push your trained models to the Hub.\n","\n","- Get a result of >= 350 for `Cartpole-v1`.\n","- Get a result of >= 5 for `PixelCopter`.\n","\n","To find your result, go to the leaderboard and find your model, **the result = mean_reward - std of reward**. **If you don't see your model on the leaderboard, go at the bottom of the leaderboard page and click on the refresh button**.\n","\n","For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process\n"],"metadata":{"id":"Bsh4ZAamchSl"}},{"cell_type":"markdown","source":["## An advice 💡\n","It's better to run this colab in a copy on your Google Drive, so that **if it timeouts** you still have the saved notebook on your Google Drive and do not need to fill everything from scratch.\n","\n","To do that you can either do `Ctrl + S` or `File > Save a copy in Google Drive.`"],"metadata":{"id":"JoTC9o2SczNn"}},{"cell_type":"markdown","source":["## Set the GPU 💪\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step1.jpg\" alt=\"GPU Step 1\">"],"metadata":{"id":"PU4FVzaoM6fC"}},{"cell_type":"markdown","source":["- `Hardware Accelerator > GPU`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step2.jpg\" alt=\"GPU Step 2\">"],"metadata":{"id":"KV0NyFdQM9ZG"}},{"cell_type":"markdown","source":["## Create a virtual display 🖥\n","\n","During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install the librairies and create and run a virtual screen 🖥"],"metadata":{"id":"bTpYcVZVMzUI"}},{"cell_type":"code","execution_count":1,"metadata":{"id":"jV6wjQ7Be7p5","executionInfo":{"status":"ok","timestamp":1697189479738,"user_tz":-60,"elapsed":26147,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["%%capture\n","!apt install python-opengl\n","!apt install ffmpeg\n","!apt install xvfb\n","!pip install pyvirtualdisplay\n","!pip install pyglet==1.5.1"]},{"cell_type":"code","source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"],"metadata":{"id":"Sr-Nuyb1dBm0","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697189480454,"user_tz":-60,"elapsed":720,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"0824fafa-e5c9-4760-807c-22ac6f4a90dd"},"execution_count":2,"outputs":[{"output_type":"execute_result","data":{"text/plain":["<pyvirtualdisplay.display.Display at 0x7d9b241af9a0>"]},"metadata":{},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"tjrLfPFIW8XK"},"source":["## Install the dependencies 🔽\n","The first step is to install the dependencies. We’ll install multiple ones:\n","\n","- `gym`\n","- `gym-games`: Extra gym environments made with PyGame.\n","- `huggingface_hub`: 🤗 works as a central place where anyone can share and explore models and datasets. It has versioning, metrics, visualizations, and other features that will allow you to easily collaborate with others.\n","\n","You may be wondering why we install gym and not gymnasium, a more recent version of gym? **Because the gym-games we are using are not updated yet with gymnasium**.\n","\n","The differences you'll encounter here:\n","- In `gym` we don't have `terminated` and `truncated` but only `done`.\n","- In `gym` using `env.step()` returns `state, reward, done, info`\n","\n","You can learn more about the differences between Gym and Gymnasium here 👉 https://gymnasium.farama.org/content/migration-guide/\n","\n","\n","You can see here all the Reinforce models available 👉 https://huggingface.co/models?other=reinforce\n","\n","And you can find all the Deep Reinforcement Learning models here 👉 https://huggingface.co/models?pipeline_tag=reinforcement-learning\n"]},{"cell_type":"code","source":["!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt"],"metadata":{"id":"e8ZVi-uydpgL","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697189499517,"user_tz":-60,"elapsed":12635,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"4631edda-8e5a-4a6c-bff0-d486e8270076"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting git+https://github.com/ntasfi/PyGame-Learning-Environment.git (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 1))\n","  Cloning https://github.com/ntasfi/PyGame-Learning-Environment.git to /tmp/pip-req-build-pbury65s\n","  Running command git clone --filter=blob:none --quiet https://github.com/ntasfi/PyGame-Learning-Environment.git /tmp/pip-req-build-pbury65s\n","  Resolved https://github.com/ntasfi/PyGame-Learning-Environment.git to commit 3dbe79dc0c35559bb441b9359948aabf9bb3d331\n","  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting git+https://github.com/simoninithomas/gym-games (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2))\n","  Cloning https://github.com/simoninithomas/gym-games to /tmp/pip-req-build-cjb_39h_\n","  Running command git clone --filter=blob:none --quiet https://github.com/simoninithomas/gym-games /tmp/pip-req-build-cjb_39h_\n","  Resolved https://github.com/simoninithomas/gym-games to commit f31695e4ba028400628dc054ee8a436f28193f0b\n","  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting huggingface_hub (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3))\n","  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: imageio-ffmpeg in /usr/local/lib/python3.10/dist-packages (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 4)) (0.4.9)\n","Collecting pyyaml==6.0 (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 5))\n","  Downloading PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (682 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m682.2/682.2 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from ple==0.0.1->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 1)) (1.23.5)\n","Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from ple==0.0.1->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 1)) (9.4.0)\n","Requirement already satisfied: gym>=0.13.0 in /usr/local/lib/python3.10/dist-packages (from gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (0.25.2)\n","Requirement already satisfied: setuptools>=65.5.1 in /usr/local/lib/python3.10/dist-packages (from gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (67.7.2)\n","Requirement already satisfied: pygame>=1.9.6 in /usr/local/lib/python3.10/dist-packages (from gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (2.5.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (3.12.4)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (2023.6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (2.31.0)\n","Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (4.66.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (4.5.0)\n","Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (23.2)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gym>=0.13.0->gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (2.2.1)\n","Requirement already satisfied: gym-notices>=0.0.4 in /usr/local/lib/python3.10/dist-packages (from gym>=0.13.0->gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (0.0.8)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (2.0.6)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (2023.7.22)\n","Building wheels for collected packages: ple, gym-games\n","  Building wheel for ple (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for ple: filename=ple-0.0.1-py3-none-any.whl size=50770 sha256=d084c36176f5d14eac837596716029b54f4c9e9fd2802b435fa1b61b6d2a1405\n","  Stored in directory: /tmp/pip-ephem-wheel-cache-g1e8y7l_/wheels/f8/31/ca/a64a7ce73540465412d82813780d062db53b90e3f42a4ecb7f\n","  Building wheel for gym-games (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for gym-games: filename=gym_games-1.0.4-py3-none-any.whl size=17306 sha256=fd314c50edfecfb70d2cab842c3149422f794de3d81be5548d42ef762b6f352d\n","  Stored in directory: /tmp/pip-ephem-wheel-cache-g1e8y7l_/wheels/ca/bf/6b/7d631626202ebb033c908a688d1862ff4d948c34cf621d7dc9\n","Successfully built ple gym-games\n","Installing collected packages: pyyaml, ple, huggingface_hub, gym-games\n","  Attempting uninstall: pyyaml\n","    Found existing installation: PyYAML 6.0.1\n","    Uninstalling PyYAML-6.0.1:\n","      Successfully uninstalled PyYAML-6.0.1\n","Successfully installed gym-games-1.0.4 huggingface_hub-0.18.0 ple-0.0.1 pyyaml-6.0\n"]}]},{"cell_type":"markdown","metadata":{"id":"AAHAq6RZW3rn"},"source":["## Import the packages 📦\n","In addition to import the installed libraries, we also import:\n","\n","- `imageio`: A library that will help us to generate a replay video\n","\n"]},{"cell_type":"code","execution_count":4,"metadata":{"id":"V8oadoJSWp7C","executionInfo":{"status":"ok","timestamp":1697189513399,"user_tz":-60,"elapsed":7285,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["import numpy as np\n","\n","from collections import deque\n","\n","import matplotlib.pyplot as plt\n","%matplotlib inline\n","\n","# PyTorch\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","import torch.optim as optim\n","from torch.distributions import Categorical\n","\n","# Gym\n","import gym\n","import gym_pygame\n","\n","# Hugging Face Hub\n","from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.\n","import imageio"]},{"cell_type":"markdown","source":["## Check if we have a GPU\n","\n","- Let's check if we have a GPU\n","- If it's the case you should see `device:cuda0`"],"metadata":{"id":"RfxJYdMeeVgv"}},{"cell_type":"code","execution_count":5,"metadata":{"id":"kaJu5FeZxXGY","executionInfo":{"status":"ok","timestamp":1697189516797,"user_tz":-60,"elapsed":238,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"U5TNYa14aRav","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697189519400,"user_tz":-60,"elapsed":208,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"a0ed2a82-0d96-4208-ff3a-c9aa2940a735"},"outputs":[{"output_type":"stream","name":"stdout","text":["cuda:0\n"]}],"source":["print(device)"]},{"cell_type":"markdown","metadata":{"id":"PBPecCtBL_pZ"},"source":["We're now ready to implement our Reinforce algorithm 🔥"]},{"cell_type":"markdown","metadata":{"id":"8KEyKYo2ZSC-"},"source":["# First agent: Playing CartPole-v1 🤖"]},{"cell_type":"markdown","metadata":{"id":"haLArKURMyuF"},"source":["## Create the CartPole environment and understand how it works\n","### [The environment 🎮](https://www.gymlibrary.dev/environments/classic_control/cart_pole/)\n"]},{"cell_type":"markdown","metadata":{"id":"AH_TaLKFXo_8"},"source":["### Why do we use a simple environment like CartPole-v1?\n","As explained in [Reinforcement Learning Tips and Tricks](https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html), when you implement your agent from scratch you need **to be sure that it works correctly and find bugs with easy environments before going deeper**. Since finding bugs will be much easier in simple environments.\n","\n","\n","> Try to have some “sign of life” on toy problems\n","\n","\n","> Validate the implementation by making it run on harder and harder envs (you can compare results against the RL zoo). You usually need to run hyperparameter optimization for that step.\n","___\n","### The CartPole-v1 environment\n","\n","> A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces in the left and right direction on the cart.\n","\n","\n","\n","So, we start with CartPole-v1. The goal is to push the cart left or right **so that the pole stays in the equilibrium.**\n","\n","The episode ends if:\n","- The pole Angle is greater than ±12°\n","- Cart Position is greater than ±2.4\n","- Episode length is greater than 500\n","\n","We get a reward 💰 of +1 every timestep the Pole stays in the equilibrium."]},{"cell_type":"code","execution_count":141,"metadata":{"id":"POOOk15_K6KA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697194221950,"user_tz":-60,"elapsed":219,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"779b7b4e-5a21-430f-c7d4-622eed7d1451"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/gym/core.py:317: DeprecationWarning: \u001b[33mWARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.\u001b[0m\n","  deprecation(\n","/usr/local/lib/python3.10/dist-packages/gym/wrappers/step_api_compatibility.py:39: DeprecationWarning: \u001b[33mWARN: Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.\u001b[0m\n","  deprecation(\n"]}],"source":["env_id = \"CartPole-v1\"\n","# Create the env\n","env = gym.make(env_id)\n","\n","# Create the evaluation env\n","eval_env = gym.make(env_id)\n","\n","# Get the state space and action space\n","s_size = env.observation_space.shape[0]\n","a_size = env.action_space.n"]},{"cell_type":"code","execution_count":142,"metadata":{"id":"FMLFrjiBNLYJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697194222821,"user_tz":-60,"elapsed":3,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"bedcfa58-1a35-42e5-cb4b-e9f9c3016f1f"},"outputs":[{"output_type":"stream","name":"stdout","text":["_____OBSERVATION SPACE_____ \n","\n","The State Space is:  4\n","Sample observation [-2.7834890e+00 -4.5419460e+37 -7.4647829e-02 -1.0727393e+37]\n"]}],"source":["print(\"_____OBSERVATION SPACE_____ \\n\")\n","print(\"The State Space is: \", s_size)\n","print(\"Sample observation\", env.observation_space.sample()) # Get a random observation"]},{"cell_type":"code","execution_count":143,"metadata":{"id":"Lu6t4sRNNWkN","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697194223482,"user_tz":-60,"elapsed":2,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"ebd08bdc-e037-4cd9-ef4d-e46da1d370db"},"outputs":[{"output_type":"stream","name":"stdout","text":["\n"," _____ACTION SPACE_____ \n","\n","The Action Space is:  2\n","Action Space Sample 1\n"]}],"source":["print(\"\\n _____ACTION SPACE_____ \\n\")\n","print(\"The Action Space is: \", a_size)\n","print(\"Action Space Sample\", env.action_space.sample()) # Take a random action"]},{"cell_type":"markdown","metadata":{"id":"7SJMJj3WaFOz"},"source":["## Let's build the Reinforce Architecture\n","This implementation is based on two implementations:\n","- [PyTorch official Reinforcement Learning example](https://github.com/pytorch/examples/blob/main/reinforcement_learning/reinforce.py)\n","- [Udacity Reinforce](https://github.com/udacity/deep-reinforcement-learning/blob/master/reinforce/REINFORCE.ipynb)\n","- [Improvement of the integration by Chris1nexus](https://github.com/huggingface/deep-rl-class/pull/95)\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit6/reinforce.png\" alt=\"Reinforce\"/>"]},{"cell_type":"markdown","metadata":{"id":"49kogtxBODX8"},"source":["So we want:\n","- Two fully connected layers (fc1 and fc2).\n","- Using ReLU as activation function of fc1\n","- Using Softmax to output a probability distribution over actions"]},{"cell_type":"code","execution_count":144,"metadata":{"id":"w2LHcHhVZvPZ","executionInfo":{"status":"ok","timestamp":1697194225352,"user_tz":-60,"elapsed":211,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["class Policy(nn.Module):\n","    def __init__(self, s_size, a_size, h_size):\n","        super(Policy, self).__init__()\n","        # Create two fully connected layers\n","        self.fc1 = nn.Linear(s_size, h_size)\n","        self.fc2 = nn.Linear(h_size, a_size)\n","\n","    def forward(self, x):\n","        # Define the forward pass\n","        # state goes to fc1 then we apply ReLU activation function\n","        x = F.relu(self.fc1(x))\n","        # fc1 outputs goes to fc2\n","        x = self.fc2(x)\n","        # We output the softmax\n","        return F.softmax(x, dim=1)\n","\n","    def act(self, state):\n","        \"\"\"\n","        Given a state, take action\n","        \"\"\"\n","        state = torch.from_numpy(state).float().unsqueeze(0).to(device)\n","        probs = self.forward(state).cpu()\n","        m = Categorical(probs)\n","        action = m.sample()\n","        return action.item(), m.log_prob(action)"]},{"cell_type":"markdown","metadata":{"id":"rOMrdwSYOWSC"},"source":["### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jGdhRSVrOV4K"},"outputs":[],"source":["class Policy(nn.Module):\n","    def __init__(self, s_size, a_size, h_size):\n","        super(Policy, self).__init__()\n","        self.fc1 = nn.Linear(s_size, h_size)\n","        self.fc2 = nn.Linear(h_size, a_size)\n","\n","    def forward(self, x):\n","        x = F.relu(self.fc1(x))\n","        x = self.fc2(x)\n","        return F.softmax(x, dim=1)\n","\n","    def act(self, state):\n","        state = torch.from_numpy(state).float().unsqueeze(0).to(device)\n","        probs = self.forward(state).cpu()\n","        m = Categorical(probs)\n","        action = np.argmax(m)\n","        return action.item(), m.log_prob(action)"]},{"cell_type":"markdown","metadata":{"id":"ZTGWL4g2eM5B"},"source":["I make a mistake, can you guess where?\n","\n","- To find out let's make a forward pass:"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lwnqGBCNePor"},"outputs":[],"source":["debug_policy = Policy(s_size, a_size, 64).to(device)\n","debug_policy.act(env.reset())"]},{"cell_type":"markdown","metadata":{"id":"14UYkoxCPaor"},"source":["- Here we see that the error says `ValueError: The value argument to log_prob must be a Tensor`\n","\n","- It means that `action` in `m.log_prob(action)` must be a Tensor **but it's not.**\n","\n","- Do you know why? Check the act function and try to see why it does not work.\n","\n","Advice 💡: Something is wrong in this implementation. Remember that we act function **we want to sample an action from the probability distribution over actions**.\n"]},{"cell_type":"markdown","metadata":{"id":"gfGJNZBUP7Vn"},"source":["### (Real) Solution"]},{"cell_type":"code","execution_count":145,"metadata":{"id":"Ho_UHf49N9i4","executionInfo":{"status":"ok","timestamp":1697194227568,"user_tz":-60,"elapsed":234,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["class Policy(nn.Module):\n","    def __init__(self, s_size, a_size, h_size):\n","        super(Policy, self).__init__()\n","        self.fc1 = nn.Linear(s_size, h_size)\n","        self.fc2 = nn.Linear(h_size, a_size)\n","\n","    def forward(self, x):\n","        x = F.relu(self.fc1(x))\n","        x = self.fc2(x)\n","        return F.softmax(x, dim=1)\n","\n","    def act(self, state):\n","        state = torch.from_numpy(state).float().unsqueeze(0).to(device)\n","        probs = self.forward(state).cpu()\n","        m = Categorical(probs)\n","        action = m.sample()\n","        return action.item(), m.log_prob(action)"]},{"cell_type":"markdown","metadata":{"id":"rgJWQFU_eUYw"},"source":["By using CartPole, it was easier to debug since **we know that the bug comes from our integration and not from our simple environment**."]},{"cell_type":"markdown","source":["- Since **we want to sample an action from the probability distribution over actions**, we can't use `action = np.argmax(m)` since it will always output the action that have the highest probability.\n","\n","- We need to replace with `action = m.sample()` that will sample an action from the probability distribution P(.|s)"],"metadata":{"id":"c-20i7Pk0l1T"}},{"cell_type":"markdown","metadata":{"id":"4MXoqetzfIoW"},"source":["### Let's build the Reinforce Training Algorithm\n","This is the Reinforce algorithm pseudocode:\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit6/pg_pseudocode.png\" alt=\"Policy gradient pseudocode\"/>\n","  "]},{"cell_type":"markdown","source":["- When we calculate the return Gt (line 6) we see that we calculate the sum of discounted rewards **starting at timestep t**.\n","\n","- Why? Because our policy should only **reinforce actions on the basis of the consequences**: so rewards obtained before taking an action are useless (since they were not because of the action), **only the ones that come after the action matters**.\n","\n","- Before coding this you should read this section [don't let the past distract you](https://spinningup.openai.com/en/latest/spinningup/rl_intro3.html#don-t-let-the-past-distract-you) that explains why we use reward-to-go policy gradient.\n","\n","We use an interesting technique coded by [Chris1nexus](https://github.com/Chris1nexus) to **compute the return at each timestep efficiently**. The comments explained the procedure. Don't hesitate also [to check the PR explanation](https://github.com/huggingface/deep-rl-class/pull/95)\n","But overall the idea is to **compute the return at each timestep efficiently**."],"metadata":{"id":"QmcXG-9i2Qu2"}},{"cell_type":"markdown","metadata":{"id":"O554nUGPpcoq"},"source":["The second question you may ask is **why do we minimize the loss**? You talked about Gradient Ascent not Gradient Descent?\n","\n","- We want to maximize our utility function $J(\\theta)$ but in PyTorch like in Tensorflow it's better to **minimize an objective function.**\n","    - So let's say we want to reinforce action 3 at a certain timestep. Before training this action P is 0.25.\n","    - So we want to modify $\\theta$ such that $\\pi_\\theta(a_3|s; \\theta) > 0.25$\n","    - Because all P must sum to 1, max $\\pi_\\theta(a_3|s; \\theta)$ will **minimize other action probability.**\n","    - So we should tell PyTorch **to min $1 - \\pi_\\theta(a_3|s; \\theta)$.**\n","    - This loss function approaches 0 as $\\pi_\\theta(a_3|s; \\theta)$ nears 1.\n","    - So we are encouraging the gradient to max $\\pi_\\theta(a_3|s; \\theta)$\n"]},{"cell_type":"code","execution_count":14,"metadata":{"id":"iOdv8Q9NfLK7","executionInfo":{"status":"ok","timestamp":1697190619013,"user_tz":-60,"elapsed":400,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def reinforce(policy, optimizer, n_training_episodes, max_t, gamma, print_every):\n","    # Help us to calculate the score during the training\n","    scores_deque = deque(maxlen=100)\n","    scores = []\n","    # Line 3 of pseudocode\n","    for i_episode in range(1, n_training_episodes+1):\n","        saved_log_probs = []\n","        rewards = []\n","        state = env.reset()\n","        # Line 4 of pseudocode\n","        for t in range(max_t):\n","            action, log_prob = policy.act(state)\n","            saved_log_probs.append(log_prob)\n","            state, reward, done, _ = env.step(action)\n","            rewards.append(reward)\n","            if done:\n","                break\n","        scores_deque.append(sum(rewards))\n","        scores.append(sum(rewards))\n","\n","        # Line 6 of pseudocode: calculate the return\n","        returns = deque(maxlen=max_t)\n","        n_steps = len(rewards)\n","        # Compute the discounted returns at each timestep,\n","        # as the sum of the gamma-discounted return at time t (G_t) + the reward at time t\n","\n","        # In O(N) time, where N is the number of time steps\n","        # (this definition of the discounted return G_t follows the definition of this quantity\n","        # shown at page 44 of Sutton&Barto 2017 2nd draft)\n","        # G_t = r_(t+1) + r_(t+2) + ...\n","\n","        # Given this formulation, the returns at each timestep t can be computed\n","        # by re-using the computed future returns G_(t+1) to compute the current return G_t\n","        # G_t = r_(t+1) + gamma*G_(t+1)\n","        # G_(t-1) = r_t + gamma* G_t\n","        # (this follows a dynamic programming approach, with which we memorize solutions in order\n","        # to avoid computing them multiple times)\n","\n","        # This is correct since the above is equivalent to (see also page 46 of Sutton&Barto 2017 2nd draft)\n","        # G_(t-1) = r_t + gamma*r_(t+1) + gamma*gamma*r_(t+2) + ...\n","\n","\n","        ## Given the above, we calculate the returns at timestep t as:\n","        #               gamma[t] * return[t] + reward[t]\n","        #\n","        ## We compute this starting from the last timestep to the first, in order\n","        ## to employ the formula presented above and avoid redundant computations that would be needed\n","        ## if we were to do it from first to last.\n","\n","        ## Hence, the queue \"returns\" will hold the returns in chronological order, from t=0 to t=n_steps\n","        ## thanks to the appendleft() function which allows to append to the position 0 in constant time O(1)\n","        ## a normal python list would instead require O(N) to do this.\n","        for t in range(n_steps)[::-1]:\n","            disc_return_t = (returns[0] if len(returns)>0 else 0)\n","            returns.appendleft(gamma * disc_return_t + rewards[t]) # TODO: complete here\n","\n","        ## standardization of the returns is employed to make training more stable\n","        eps = np.finfo(np.float32).eps.item()\n","\n","        ## eps is the smallest representable float, which is\n","        # added to the standard deviation of the returns to avoid numerical instabilities\n","        returns = torch.tensor(returns)\n","        returns = (returns - returns.mean()) / (returns.std() + eps)\n","\n","        # Line 7:\n","        policy_loss = []\n","        for log_prob, disc_return in zip(saved_log_probs, returns):\n","            policy_loss.append(-log_prob * disc_return)\n","        policy_loss = torch.cat(policy_loss).sum()\n","\n","        # Line 8: PyTorch prefers gradient descent\n","        optimizer.zero_grad()\n","        policy_loss.backward()\n","        optimizer.step()\n","\n","        if i_episode % print_every == 0:\n","            print('Episode {}\\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))\n","\n","    return scores"]},{"cell_type":"markdown","metadata":{"id":"YB0Cxrw1StrP"},"source":["#### Solution"]},{"cell_type":"code","execution_count":146,"metadata":{"id":"NCNvyElRStWG","executionInfo":{"status":"ok","timestamp":1697194232794,"user_tz":-60,"elapsed":230,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def reinforce(policy, optimizer, n_training_episodes, max_t, gamma, print_every):\n","    # Help us to calculate the score during the training\n","    scores_deque = deque(maxlen=100)\n","    scores = []\n","    # Line 3 of pseudocode\n","    for i_episode in range(1, n_training_episodes+1):\n","        saved_log_probs = []\n","        rewards = []\n","        state = env.reset()\n","        # Line 4 of pseudocode\n","        for t in range(max_t):\n","            action, log_prob = policy.act(state)\n","            saved_log_probs.append(log_prob)\n","            state, reward, done, _ = env.step(action)\n","            rewards.append(reward)\n","            if done:\n","                break\n","        scores_deque.append(sum(rewards))\n","        scores.append(sum(rewards))\n","\n","        # Line 6 of pseudocode: calculate the return\n","        returns = deque(maxlen=max_t)\n","        n_steps = len(rewards)\n","        # Compute the discounted returns at each timestep,\n","        # as\n","        #      the sum of the gamma-discounted return at time t (G_t) + the reward at time t\n","        #\n","        # In O(N) time, where N is the number of time steps\n","        # (this definition of the discounted return G_t follows the definition of this quantity\n","        # shown at page 44 of Sutton&Barto 2017 2nd draft)\n","        # G_t = r_(t+1) + r_(t+2) + ...\n","\n","        # Given this formulation, the returns at each timestep t can be computed\n","        # by re-using the computed future returns G_(t+1) to compute the current return G_t\n","        # G_t = r_(t+1) + gamma*G_(t+1)\n","        # G_(t-1) = r_t + gamma* G_t\n","        # (this follows a dynamic programming approach, with which we memorize solutions in order\n","        # to avoid computing them multiple times)\n","\n","        # This is correct since the above is equivalent to (see also page 46 of Sutton&Barto 2017 2nd draft)\n","        # G_(t-1) = r_t + gamma*r_(t+1) + gamma*gamma*r_(t+2) + ...\n","\n","\n","        ## Given the above, we calculate the returns at timestep t as:\n","        #               gamma[t] * return[t] + reward[t]\n","        #\n","        ## We compute this starting from the last timestep to the first, in order\n","        ## to employ the formula presented above and avoid redundant computations that would be needed\n","        ## if we were to do it from first to last.\n","\n","        ## Hence, the queue \"returns\" will hold the returns in chronological order, from t=0 to t=n_steps\n","        ## thanks to the appendleft() function which allows to append to the position 0 in constant time O(1)\n","        ## a normal python list would instead require O(N) to do this.\n","        for t in range(n_steps)[::-1]:\n","            disc_return_t = (returns[0] if len(returns)>0 else 0)\n","            returns.appendleft( gamma*disc_return_t + rewards[t]   )\n","\n","        ## standardization of the returns is employed to make training more stable\n","        eps = np.finfo(np.float32).eps.item()\n","        ## eps is the smallest representable float, which is\n","        # added to the standard deviation of the returns to avoid numerical instabilities\n","        returns = torch.tensor(returns)\n","        returns = (returns - returns.mean()) / (returns.std() + eps)\n","\n","        # Line 7:\n","        policy_loss = []\n","        for log_prob, disc_return in zip(saved_log_probs, returns):\n","            policy_loss.append(-log_prob * disc_return)\n","        policy_loss = torch.cat(policy_loss).sum()\n","\n","        # Line 8: PyTorch prefers gradient descent\n","        optimizer.zero_grad()\n","        policy_loss.backward()\n","        optimizer.step()\n","\n","        if i_episode % print_every == 0:\n","            print('Episode {}\\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))\n","\n","    return scores"]},{"cell_type":"markdown","metadata":{"id":"RIWhQyJjfpEt"},"source":["##  Train it\n","- We're now ready to train our agent.\n","- But first, we define a variable containing all the training hyperparameters.\n","- You can change the training parameters (and should 😉)"]},{"cell_type":"code","execution_count":198,"metadata":{"id":"utRe1NgtVBYF","executionInfo":{"status":"ok","timestamp":1697195245035,"user_tz":-60,"elapsed":2,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["cartpole_hyperparameters = {\n","    \"h_size\": 8, #16,\n","    \"n_training_episodes\": 500,\n","    \"n_evaluation_episodes\": 10,\n","    \"max_t\": 1000, #1000,\n","    \"gamma\": 1.0, #1.0,\n","    \"lr\": 1e-2, #1e-2,\n","    \"env_id\": env_id,\n","    \"state_space\": s_size,\n","    \"action_space\": a_size,\n","}"]},{"cell_type":"code","execution_count":199,"metadata":{"id":"D3lWyVXBVfl6","executionInfo":{"status":"ok","timestamp":1697195245276,"user_tz":-60,"elapsed":3,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["# Create policy and place it to the device\n","cartpole_policy = Policy(cartpole_hyperparameters[\"state_space\"], cartpole_hyperparameters[\"action_space\"], cartpole_hyperparameters[\"h_size\"]).to(device)\n","cartpole_optimizer = optim.Adam(cartpole_policy.parameters(), lr=cartpole_hyperparameters[\"lr\"])"]},{"cell_type":"code","execution_count":200,"metadata":{"id":"uGf-hQCnfouB","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697195416949,"user_tz":-60,"elapsed":171675,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"66295dd9-ba27-4535-92ec-f4488b1af637"},"outputs":[{"output_type":"stream","name":"stdout","text":["Episode 100\tAverage Score: 37.43\n","Episode 200\tAverage Score: 202.48\n","Episode 300\tAverage Score: 427.34\n","Episode 400\tAverage Score: 489.71\n","Episode 500\tAverage Score: 499.94\n"]}],"source":["scores = reinforce(cartpole_policy,\n","                   cartpole_optimizer,\n","                   cartpole_hyperparameters[\"n_training_episodes\"],\n","                   cartpole_hyperparameters[\"max_t\"],\n","                   cartpole_hyperparameters[\"gamma\"],\n","                   100)"]},{"cell_type":"markdown","metadata":{"id":"Qajj2kXqhB3g"},"source":["## Define evaluation method 📝\n","- Here we define the evaluation method that we're going to use to test our Reinforce agent."]},{"cell_type":"code","execution_count":201,"metadata":{"id":"3FamHmxyhBEU","executionInfo":{"status":"ok","timestamp":1697195430373,"user_tz":-60,"elapsed":217,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def evaluate_agent(env, max_steps, n_eval_episodes, policy):\n","  \"\"\"\n","  Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n","  :param env: The evaluation environment\n","  :param n_eval_episodes: Number of episode to evaluate the agent\n","  :param policy: The Reinforce agent\n","  \"\"\"\n","  episode_rewards = []\n","  for episode in range(n_eval_episodes):\n","    state = env.reset()\n","    step = 0\n","    done = False\n","    total_rewards_ep = 0\n","\n","    for step in range(max_steps):\n","      action, _ = policy.act(state)\n","      new_state, reward, done, info = env.step(action)\n","      total_rewards_ep += reward\n","\n","      if done:\n","        break\n","      state = new_state\n","    episode_rewards.append(total_rewards_ep)\n","  mean_reward = np.mean(episode_rewards)\n","  std_reward = np.std(episode_rewards)\n","\n","  return mean_reward, std_reward"]},{"cell_type":"markdown","metadata":{"id":"xdH2QCrLTrlT"},"source":["## Evaluate our agent 📈"]},{"cell_type":"code","execution_count":202,"metadata":{"id":"ohGSXDyHh0xx","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697195435253,"user_tz":-60,"elapsed":3743,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"ddc020d3-561b-417f-a60e-fbaedda7606c"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["(500.0, 0.0)"]},"metadata":{},"execution_count":202}],"source":["evaluate_agent(eval_env,\n","               cartpole_hyperparameters[\"max_t\"],\n","               cartpole_hyperparameters[\"n_evaluation_episodes\"],\n","               cartpole_policy)"]},{"cell_type":"markdown","metadata":{"id":"7CoeLkQ7TpO8"},"source":["### Publish our trained model on the Hub 🔥\n","Now that we saw we got good results after the training, we can publish our trained model on the hub 🤗 with one line of code.\n","\n","Here's an example of a Model Card:\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit6/modelcard.png\"/>"]},{"cell_type":"markdown","metadata":{"id":"Jmhs1k-cftIq"},"source":["### Push to the Hub\n","#### Do not modify this code"]},{"cell_type":"code","source":["from huggingface_hub import HfApi, snapshot_download\n","from huggingface_hub.repocard import metadata_eval_result, metadata_save\n","\n","from pathlib import Path\n","import datetime\n","import json\n","import imageio\n","\n","import tempfile\n","\n","import os"],"metadata":{"id":"LIVsvlW_8tcw","executionInfo":{"status":"ok","timestamp":1697195439370,"user_tz":-60,"elapsed":230,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"execution_count":203,"outputs":[]},{"cell_type":"code","execution_count":204,"metadata":{"id":"Lo4JH45if81z","executionInfo":{"status":"ok","timestamp":1697195440290,"user_tz":-60,"elapsed":3,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def record_video(env, policy, out_directory, fps=30):\n","  \"\"\"\n","  Generate a replay video of the agent\n","  :param env\n","  :param Qtable: Qtable of our agent\n","  :param out_directory\n","  :param fps: how many frame per seconds (with taxi-v3 and frozenlake-v1 we use 1)\n","  \"\"\"\n","  images = []\n","  done = False\n","  state = env.reset()\n","  img = env.render(mode='rgb_array')\n","  images.append(img)\n","  while not done:\n","    # Take the action (index) that have the maximum expected future reward given that state\n","    action, _ = policy.act(state)\n","    state, reward, done, info = env.step(action) # We directly put next_state = state for recording logic\n","    img = env.render(mode='rgb_array')\n","    images.append(img)\n","  imageio.mimsave(out_directory, [np.array(img) for i, img in enumerate(images)], fps=fps)"]},{"cell_type":"code","source":["def push_to_hub(repo_id,\n","                model,\n","                hyperparameters,\n","                eval_env,\n","                video_fps=30\n","                ):\n","  \"\"\"\n","  Evaluate, Generate a video and Upload a model to Hugging Face Hub.\n","  This method does the complete pipeline:\n","  - It evaluates the model\n","  - It generates the model card\n","  - It generates a replay video of the agent\n","  - It pushes everything to the Hub\n","\n","  :param repo_id: repo_id: id of the model repository from the Hugging Face Hub\n","  :param model: the pytorch model we want to save\n","  :param hyperparameters: training hyperparameters\n","  :param eval_env: evaluation environment\n","  :param video_fps: how many frame per seconds to record our video replay\n","  \"\"\"\n","\n","  _, repo_name = repo_id.split(\"/\")\n","  api = HfApi()\n","\n","  # Step 1: Create the repo\n","  repo_url = api.create_repo(\n","        repo_id=repo_id,\n","        exist_ok=True,\n","  )\n","\n","  with tempfile.TemporaryDirectory() as tmpdirname:\n","    local_directory = Path(tmpdirname)\n","\n","    # Step 2: Save the model\n","    torch.save(model, local_directory / \"model.pt\")\n","\n","    # Step 3: Save the hyperparameters to JSON\n","    with open(local_directory / \"hyperparameters.json\", \"w\") as outfile:\n","      json.dump(hyperparameters, outfile)\n","\n","    # Step 4: Evaluate the model and build JSON\n","    mean_reward, std_reward = evaluate_agent(eval_env,\n","                                            hyperparameters[\"max_t\"],\n","                                            hyperparameters[\"n_evaluation_episodes\"],\n","                                            model)\n","    # Get datetime\n","    eval_datetime = datetime.datetime.now()\n","    eval_form_datetime = eval_datetime.isoformat()\n","\n","    evaluate_data = {\n","          \"env_id\": hyperparameters[\"env_id\"],\n","          \"mean_reward\": mean_reward,\n","          \"n_evaluation_episodes\": hyperparameters[\"n_evaluation_episodes\"],\n","          \"eval_datetime\": eval_form_datetime,\n","    }\n","\n","    # Write a JSON file\n","    with open(local_directory / \"results.json\", \"w\") as outfile:\n","        json.dump(evaluate_data, outfile)\n","\n","    # Step 5: Create the model card\n","    env_name = hyperparameters[\"env_id\"]\n","\n","    metadata = {}\n","    metadata[\"tags\"] = [\n","          env_name,\n","          \"reinforce\",\n","          \"reinforcement-learning\",\n","          \"custom-implementation\",\n","          \"deep-rl-class\"\n","      ]\n","\n","    # Add metrics\n","    eval = metadata_eval_result(\n","        model_pretty_name=repo_name,\n","        task_pretty_name=\"reinforcement-learning\",\n","        task_id=\"reinforcement-learning\",\n","        metrics_pretty_name=\"mean_reward\",\n","        metrics_id=\"mean_reward\",\n","        metrics_value=f\"{mean_reward:.2f} +/- {std_reward:.2f}\",\n","        dataset_pretty_name=env_name,\n","        dataset_id=env_name,\n","      )\n","\n","    # Merges both dictionaries\n","    metadata = {**metadata, **eval}\n","\n","    model_card = f\"\"\"\n","  # **Reinforce** Agent playing **{env_id}**\n","  This is a trained model of a **Reinforce** agent playing **{env_id}** .\n","  To learn to use this model and train yours check Unit 4 of the Deep Reinforcement Learning Course: https://huggingface.co/deep-rl-course/unit4/introduction\n","  \"\"\"\n","\n","    readme_path = local_directory / \"README.md\"\n","    readme = \"\"\n","    if readme_path.exists():\n","        with readme_path.open(\"r\", encoding=\"utf8\") as f:\n","          readme = f.read()\n","    else:\n","      readme = model_card\n","\n","    with readme_path.open(\"w\", encoding=\"utf-8\") as f:\n","      f.write(readme)\n","\n","    # Save our metrics to Readme metadata\n","    metadata_save(readme_path, metadata)\n","\n","    # Step 6: Record a video\n","    video_path =  local_directory / \"replay.mp4\"\n","    record_video(env, model, video_path, video_fps)\n","\n","    # Step 7. Push everything to the Hub\n","    api.upload_folder(\n","          repo_id=repo_id,\n","          folder_path=local_directory,\n","          path_in_repo=\".\",\n","    )\n","\n","    print(f\"Your model is pushed to the Hub. You can view your model here: {repo_url}\")"],"metadata":{"id":"_TPdq47D7_f_","executionInfo":{"status":"ok","timestamp":1697195441625,"user_tz":-60,"elapsed":231,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"execution_count":205,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"w17w8CxzoURM"},"source":["### .\n","\n","By using `push_to_hub` **you evaluate, record a replay, generate a model card of your agent and push it to the Hub**.\n","\n","This way:\n","- You can **showcase our work** 🔥\n","- You can **visualize your agent playing** 👀\n","- You can **share with the community an agent that others can use** 💾\n","- You can **access a leaderboard 🏆 to see how well your agent is performing compared to your classmates** 👉 https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard\n"]},{"cell_type":"markdown","metadata":{"id":"cWnFC0iZooTw"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1️⃣ (If it's not already done) create an account to HF ➡ https://huggingface.co/join\n","\n","2️⃣ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/create-token.jpg\" alt=\"Create HF Token\">\n"]},{"cell_type":"code","execution_count":122,"metadata":{"id":"QB5nIcxR8paT","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["182e3dedcd2745bcb549ea005e88315b","56cb4e7164164b14a704c0fea955a679","4486295dab3f442487a8907ea0f61426","aba1a189ce5b4e0bbf755c37e0a44414","2cff9183e49f4fe6909e933b78ffe1b7","ed30d1ff45e34f3b83742a384e48b080","2524853f299e4439b12e3f08234dfca3","a89fac821524462a959a595a8b2ce819","a77ed4102c4642a5b1f76e7cb906b767","47b89d52b8ce41bbb5d1488a4a1f9562","5c6128936cc34a0aa0d825ebe9b4a4b5","7a781ca81c874ca59499a90ecd8d2d99","4412d394eaae418b85dab9a264bb9806","9d323712621d4016bc5cb4abf73f3e24","08cc80b5cb0449ce8c8b341fa291ffef","e282c76d39f74bc581cb5b899ec558fd","820fc86b834d423caf3910b0c07ff5b4","372be77f740246989f83b976ba092dd8","4a01f1d1bb16429c967e5d01e06fac10","91492660cb7c421285d033d14629750b","d511a25b17854121add2cb3760fe6f80","e83bda293b434dfcbd15b94963f45ce8","d14bc31dde9045a284836cb202709ec8","20832517f0d345f98c66bd6fd9816a5a","7d535b0a18af42b2ab5c10414ae0db6a","3234299078f34c018e6a591ebe477a14","470ee07fa0b341cc9ed596609f9e51d0","b85c1108819a45f78813dd3a148b7b4c","edc6fd51cc4e48b29b4ff4cc3beaca87","dcc7e9fad3854158adfcbdcc9e4663df","02f7ce4ff5c74819998f63175c6720e3","2458132638eb4b6dab7c41b9322ffbae"]},"executionInfo":{"status":"ok","timestamp":1697193027490,"user_tz":-60,"elapsed":224,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"91575382-19cd-476d-cb16-a16964798656"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"182e3dedcd2745bcb549ea005e88315b"}},"metadata":{}}],"source":["notebook_login()"]},{"cell_type":"markdown","metadata":{"id":"GyWc1x3-o3xG"},"source":["If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login` (or `login`)"]},{"cell_type":"markdown","metadata":{"id":"F-D-zhbRoeOm"},"source":["3️⃣ We're now ready to push our trained agent to the 🤗 Hub 🔥 using `package_to_hub()` function"]},{"cell_type":"code","execution_count":206,"metadata":{"id":"UNwkTS65Uq3Q","colab":{"base_uri":"https://localhost:8080/","height":158,"referenced_widgets":["9a8bc46b3df64f11b759ce9386050685","3ea63469a06e4710804b00dc5088f0bf","72ac1f6248b049c886d40a373195c947","14c0f88e629c4af3affe81114e5adb7d","b1d73b696e2e4b55a8c77f7e9f899a10","c61b7e7783644ec1814a25ad47c3e505","565f9c1738854ff198ebdccd9c7530ce","4350e62412d84530b2d97e7ace53c5bc","6d061a32800e4891ba896d9eb8ef0b5b","6862d17e5dcb4cbb8a76e01cd3b44ed0","d3985cded95449ffa71d3a8370fff913"]},"executionInfo":{"status":"ok","timestamp":1697195454522,"user_tz":-60,"elapsed":7797,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"ceb4632f-533f-496a-a2fe-016a9ee713d1"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/gym/core.py:43: DeprecationWarning: \u001b[33mWARN: The argument mode in render method is deprecated; use render_mode during environment initialization instead.\n","See here for more information: https://www.gymlibrary.ml/content/api/\u001b[0m\n","  deprecation(\n","WARNING:imageio_ffmpeg:IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (600, 400) to (608, 400) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n"]},{"output_type":"display_data","data":{"text/plain":["model.pt:   0%|          | 0.00/2.58k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9a8bc46b3df64f11b759ce9386050685"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Your model is pushed to the Hub. You can view your model here: https://huggingface.co/jake-walker/Reinforce-CartPole-v1\n"]}],"source":["repo_id = \"jake-walker/Reinforce-CartPole-v1\" #TODO Define your repo id {username/Reinforce-{model-id}}\n","push_to_hub(repo_id,\n","                cartpole_policy, # The model we want to save\n","                cartpole_hyperparameters, # Hyperparameters\n","                eval_env, # Evaluation environment\n","                video_fps=30\n","                )"]},{"cell_type":"markdown","metadata":{"id":"jrnuKH1gYZSz"},"source":["Now that we try the robustness of our implementation, let's try a more complex environment: PixelCopter 🚁\n","\n","\n"]},{"cell_type":"markdown","source":["## Second agent: PixelCopter 🚁\n","\n","### Study the PixelCopter environment 👀\n","- [The Environment documentation](https://pygame-learning-environment.readthedocs.io/en/latest/user/games/pixelcopter.html)\n"],"metadata":{"id":"JNLVmKKVKA6j"}},{"cell_type":"code","execution_count":207,"metadata":{"id":"JBSc8mlfyin3","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697199598242,"user_tz":-60,"elapsed":560,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"6ba2541a-048d-480b-a55a-1e48adade706"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/gym/core.py:317: DeprecationWarning: \u001b[33mWARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.\u001b[0m\n","  deprecation(\n","/usr/local/lib/python3.10/dist-packages/gym/wrappers/step_api_compatibility.py:39: DeprecationWarning: \u001b[33mWARN: Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.\u001b[0m\n","  deprecation(\n"]}],"source":["env_id = \"Pixelcopter-PLE-v0\"\n","env = gym.make(env_id)\n","eval_env = gym.make(env_id)\n","s_size = env.observation_space.shape[0]\n","a_size = env.action_space.n"]},{"cell_type":"code","source":["print(\"_____OBSERVATION SPACE_____ \\n\")\n","print(\"The State Space is: \", s_size)\n","print(\"Sample observation\", env.observation_space.sample()) # Get a random observation"],"metadata":{"id":"L5u_zAHsKBy7","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697199684723,"user_tz":-60,"elapsed":6,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"bbf3dc28-0bb7-4d54-9d7f-51f2668c4de3"},"execution_count":208,"outputs":[{"output_type":"stream","name":"stdout","text":["_____OBSERVATION SPACE_____ \n","\n","The State Space is:  7\n","Sample observation [-0.8507907   0.00901644 -0.9717172  -0.19646257 -0.31501123 -1.0114805\n","  0.8149479 ]\n"]}]},{"cell_type":"code","source":["print(\"\\n _____ACTION SPACE_____ \\n\")\n","print(\"The Action Space is: \", a_size)\n","print(\"Action Space Sample\", env.action_space.sample()) # Take a random action"],"metadata":{"id":"D7yJM9YXKNbq","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697199685652,"user_tz":-60,"elapsed":3,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"a634b0aa-d417-4a53-9886-09f7f31d6de1"},"execution_count":209,"outputs":[{"output_type":"stream","name":"stdout","text":["\n"," _____ACTION SPACE_____ \n","\n","The Action Space is:  2\n","Action Space Sample 0\n"]}]},{"cell_type":"markdown","metadata":{"id":"NNWvlyvzalXr"},"source":["The observation space (7) 👀:\n","- player y position\n","- player velocity\n","- player distance to floor\n","- player distance to ceiling\n","- next block x distance to player\n","- next blocks top y location\n","- next blocks bottom y location\n","\n","The action space(2) 🎮:\n","- Up (press accelerator)\n","- Do nothing (don't press accelerator)\n","\n","The reward function 💰:\n","- For each vertical block it passes through it gains a positive reward of +1. Each time a terminal state reached it receives a negative reward of -1."]},{"cell_type":"markdown","source":["### Define the new Policy 🧠\n","- We need to have a deeper neural network since the environment is more complex"],"metadata":{"id":"aV1466QP8crz"}},{"cell_type":"code","execution_count":210,"metadata":{"id":"I1eBkCiX2X_S","executionInfo":{"status":"ok","timestamp":1697199704948,"user_tz":-60,"elapsed":244,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["class Policy(nn.Module):\n","    def __init__(self, s_size, a_size, h_size):\n","        super(Policy, self).__init__()\n","        # Define the three layers here\n","        self.fc1 = nn.Linear(s_size, h_size)\n","        self.fc2 = nn.Linear(h_size, h_size*2)\n","        self.fc3 = nn.Linear(h_size*2, a_size)\n","\n","    def forward(self, x):\n","        # Define the forward process here\n","        x = F.relu(self.fc1(x))\n","        x = F.relu(self.fc2(x))\n","        x = self.fc3(x)\n","        return F.softmax(x, dim=1)\n","\n","    def act(self, state):\n","        state = torch.from_numpy(state).float().unsqueeze(0).to(device)\n","        probs = self.forward(state).cpu()\n","        m = Categorical(probs)\n","        action = m.sample()\n","        return action.item(), m.log_prob(action)"]},{"cell_type":"markdown","source":["#### Solution"],"metadata":{"id":"47iuAFqV8Ws-"}},{"cell_type":"code","source":["class Policy(nn.Module):\n","    def __init__(self, s_size, a_size, h_size):\n","        super(Policy, self).__init__()\n","        self.fc1 = nn.Linear(s_size, h_size)\n","        self.fc2 = nn.Linear(h_size, h_size*2)\n","        self.fc3 = nn.Linear(h_size*2, a_size)\n","\n","    def forward(self, x):\n","        x = F.relu(self.fc1(x))\n","        x = F.relu(self.fc2(x))\n","        x = self.fc3(x)\n","        return F.softmax(x, dim=1)\n","\n","    def act(self, state):\n","        state = torch.from_numpy(state).float().unsqueeze(0).to(device)\n","        probs = self.forward(state).cpu()\n","        m = Categorical(probs)\n","        action = m.sample()\n","        return action.item(), m.log_prob(action)"],"metadata":{"id":"wrNuVcHC8Xu7"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"SM1QiGCSbBkM"},"source":["### Define the hyperparameters ⚙️\n","- Because this environment is more complex.\n","- Especially for the hidden size, we need more neurons."]},{"cell_type":"code","execution_count":278,"metadata":{"id":"y0uujOR_ypB6","executionInfo":{"status":"ok","timestamp":1697204043734,"user_tz":-60,"elapsed":238,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["pixelcopter_hyperparameters = {\n","    \"h_size\": 64, # 64\n","    \"n_training_episodes\": 10000,\n","    \"n_evaluation_episodes\": 10,\n","    \"max_t\": 10000, # 10000\n","    \"gamma\": 0.99, # 0.99\n","    \"lr\": 1e-4, # 1e-4\n","    \"env_id\": env_id,\n","    \"state_space\": s_size,\n","    \"action_space\": a_size,\n","}"]},{"cell_type":"markdown","source":["###  Train it\n","- We're now ready to train our agent 🔥."],"metadata":{"id":"wyvXTJWm9GJG"}},{"cell_type":"code","execution_count":279,"metadata":{"id":"7mM2P_ckysFE","executionInfo":{"status":"ok","timestamp":1697204044075,"user_tz":-60,"elapsed":2,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["# Create policy and place it to the device\n","# torch.manual_seed(50)\n","pixelcopter_policy = Policy(pixelcopter_hyperparameters[\"state_space\"], pixelcopter_hyperparameters[\"action_space\"], pixelcopter_hyperparameters[\"h_size\"]).to(device)\n","pixelcopter_optimizer = optim.Adam(pixelcopter_policy.parameters(), lr=pixelcopter_hyperparameters[\"lr\"])"]},{"cell_type":"code","execution_count":280,"metadata":{"id":"v1HEqP-fy-Rf","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697205415686,"user_tz":-60,"elapsed":1371327,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"eda6527a-8c47-42c6-b31c-283a6cfacf09"},"outputs":[{"output_type":"stream","name":"stdout","text":["Episode 1000\tAverage Score: 3.57\n","Episode 2000\tAverage Score: 5.45\n","Episode 3000\tAverage Score: 8.89\n","Episode 4000\tAverage Score: 9.12\n","Episode 5000\tAverage Score: 11.25\n","Episode 6000\tAverage Score: 13.60\n","Episode 7000\tAverage Score: 15.97\n","Episode 8000\tAverage Score: 14.99\n","Episode 9000\tAverage Score: 17.87\n","Episode 10000\tAverage Score: 19.11\n"]}],"source":["scores = reinforce(pixelcopter_policy,\n","                   pixelcopter_optimizer,\n","                   pixelcopter_hyperparameters[\"n_training_episodes\"],\n","                   pixelcopter_hyperparameters[\"max_t\"],\n","                   pixelcopter_hyperparameters[\"gamma\"],\n","                   1000)"]},{"cell_type":"code","execution_count":281,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697205417134,"user_tz":-60,"elapsed":1456,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"9426ae3b-28f1-496d-8085-ed96fc80657a","id":"IKkQFyZdD975"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["(19.4, 14.934523762075576)"]},"metadata":{},"execution_count":281}],"source":["evaluate_agent(eval_env,\n","               pixelcopter_hyperparameters[\"max_t\"],\n","               pixelcopter_hyperparameters[\"n_evaluation_episodes\"],\n","               pixelcopter_policy)"]},{"cell_type":"markdown","source":["### Publish our trained model on the Hub 🔥"],"metadata":{"id":"8kwFQ-Ip85BE"}},{"cell_type":"code","source":["repo_id = \"jake-walker/Reinforce-PixelCopter\" #TODO Define your repo id {username/Reinforce-{model-id}}\n","push_to_hub(repo_id,\n","                pixelcopter_policy, # The model we want to save\n","                pixelcopter_hyperparameters, # Hyperparameters\n","                eval_env, # Evaluation environment\n","                video_fps=30\n","                )"],"metadata":{"id":"6PtB7LRbTKWK","colab":{"base_uri":"https://localhost:8080/","height":120,"referenced_widgets":["2949c6cee3e54b1983894fb28acb6f9c","74d9fae5441d42c8a645fa183148e2e5","84a93b3f5ff74ee99bc85beb5af76107","a39be9f762244f1fa2f0a6bcedf72a36","a6dcf9190aaf4d7d8ea521e2d0b5b0f9","96558faa30944d8eac42935704ea9486","a6f10d6908884133b9754963af735566","4434df81c6834371b5e6e8aac994d146","20e08af188c54eda8e5fb8632c0e924e","01990cbfe9fd4b58841147771799268e","ed60418f04294ca3b0f58c0891be7006"]},"executionInfo":{"status":"ok","timestamp":1697205430881,"user_tz":-60,"elapsed":3352,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"7c0be27e-9641-4a21-c2d3-ff58e27e92ce"},"execution_count":282,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/gym/core.py:43: DeprecationWarning: \u001b[33mWARN: The argument mode in render method is deprecated; use render_mode during environment initialization instead.\n","See here for more information: https://www.gymlibrary.ml/content/api/\u001b[0m\n","  deprecation(\n"]},{"output_type":"display_data","data":{"text/plain":["model.pt:   0%|          | 0.00/39.2k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2949c6cee3e54b1983894fb28acb6f9c"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Your model is pushed to the Hub. You can view your model here: https://huggingface.co/jake-walker/Reinforce-PixelCopter\n"]}]},{"cell_type":"markdown","metadata":{"id":"7VDcJ29FcOyb"},"source":["## Some additional challenges 🏆\n","The best way to learn **is to try things on your own**! As you saw, the current agent is not doing great. As a first suggestion, you can train for more steps. But also trying to find better parameters.\n","\n","In the [Leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) you will find your agents. Can you get to the top?\n","\n","Here are some ideas to achieve so:\n","* Train more steps\n","* Try different hyperparameters by looking at what your classmates have done 👉 https://huggingface.co/models?other=reinforce\n","* **Push your new trained model** on the Hub 🔥\n","* **Improving the implementation for more complex environments** (for instance, what about changing the network to a Convolutional Neural Network to handle\n","frames as observation)?"]},{"cell_type":"markdown","metadata":{"id":"x62pP0PHdA-y"},"source":["________________________________________________________________________\n","\n","**Congrats on finishing this unit**! There was a lot of information.\n","And congrats on finishing the tutorial. You've just coded your first Deep Reinforcement Learning agent from scratch using PyTorch and shared it on the Hub 🥳.\n","\n","Don't hesitate to iterate on this unit **by improving the implementation for more complex environments** (for instance, what about changing the network to a Convolutional Neural Network to handle\n","frames as observation)?\n","\n","In the next unit, **we're going to learn more about Unity MLAgents**, by training agents in Unity environments. This way, you will be ready to participate in the **AI vs AI challenges where you'll train your agents\n","to compete against other agents in a snowball fight and a soccer game.**\n","\n","Sounds fun? See you next time!\n","\n","Finally, we would love **to hear what you think of the course and how we can improve it**. If you have some feedback then, please 👉  [fill this form](https://forms.gle/BzKXWzLAGZESGNaE9)\n","\n","See you in Unit 5! 🔥\n","\n","### Keep Learning, stay awesome 🤗\n","\n"]}],"metadata":{"accelerator":"GPU","colab":{"provenance":[{"file_id":"https://github.com/huggingface/deep-rl-class/blob/main/notebooks/unit4/unit4.ipynb","timestamp":1697189385734}],"collapsed_sections":["BPLwsPajb1f8","L_WSo0VUV99t","mjY-eq3eWh9O","JoTC9o2SczNn","rOMrdwSYOWSC","gfGJNZBUP7Vn","YB0Cxrw1StrP","47iuAFqV8Ws-","x62pP0PHdA-y"],"gpuType":"T4"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.10"},"widgets":{"application/vnd.jupyter.widget-state+json":{"182e3dedcd2745bcb549ea005e88315b":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_d511a25b17854121add2cb3760fe6f80","IPY_MODEL_e83bda293b434dfcbd15b94963f45ce8","IPY_MODEL_d14bc31dde9045a284836cb202709ec8","IPY_MODEL_20832517f0d345f98c66bd6fd9816a5a"],"layout":"IPY_MODEL_2524853f299e4439b12e3f08234dfca3"}},"56cb4e7164164b14a704c0fea955a679":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a89fac821524462a959a595a8b2ce819","placeholder":"​","style":"IPY_MODEL_a77ed4102c4642a5b1f76e7cb906b767","value":"<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"}},"4486295dab3f442487a8907ea0f61426":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_47b89d52b8ce41bbb5d1488a4a1f9562","placeholder":"​","style":"IPY_MODEL_5c6128936cc34a0aa0d825ebe9b4a4b5","value":""}},"aba1a189ce5b4e0bbf755c37e0a44414":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_7a781ca81c874ca59499a90ecd8d2d99","style":"IPY_MODEL_4412d394eaae418b85dab9a264bb9806","value":true}},"2cff9183e49f4fe6909e933b78ffe1b7":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_9d323712621d4016bc5cb4abf73f3e24","style":"IPY_MODEL_08cc80b5cb0449ce8c8b341fa291ffef","tooltip":""}},"ed30d1ff45e34f3b83742a384e48b080":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e282c76d39f74bc581cb5b899ec558fd","placeholder":"​","style":"IPY_MODEL_820fc86b834d423caf3910b0c07ff5b4","value":"\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"}},"2524853f299e4439b12e3f08234dfca3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"a89fac821524462a959a595a8b2ce819":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a77ed4102c4642a5b1f76e7cb906b767":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"47b89d52b8ce41bbb5d1488a4a1f9562":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5c6128936cc34a0aa0d825ebe9b4a4b5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7a781ca81c874ca59499a90ecd8d2d99":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4412d394eaae418b85dab9a264bb9806":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9d323712621d4016bc5cb4abf73f3e24":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"08cc80b5cb0449ce8c8b341fa291ffef":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"e282c76d39f74bc581cb5b899ec558fd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"820fc86b834d423caf3910b0c07ff5b4":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"372be77f740246989f83b976ba092dd8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4a01f1d1bb16429c967e5d01e06fac10","placeholder":"​","style":"IPY_MODEL_91492660cb7c421285d033d14629750b","value":"Connecting..."}},"4a01f1d1bb16429c967e5d01e06fac10":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"91492660cb7c421285d033d14629750b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d511a25b17854121add2cb3760fe6f80":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7d535b0a18af42b2ab5c10414ae0db6a","placeholder":"​","style":"IPY_MODEL_3234299078f34c018e6a591ebe477a14","value":"Token is valid (permission: write)."}},"e83bda293b434dfcbd15b94963f45ce8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_470ee07fa0b341cc9ed596609f9e51d0","placeholder":"​","style":"IPY_MODEL_b85c1108819a45f78813dd3a148b7b4c","value":"Your token has been saved in your configured git credential helpers (store)."}},"d14bc31dde9045a284836cb202709ec8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_edc6fd51cc4e48b29b4ff4cc3beaca87","placeholder":"​","style":"IPY_MODEL_dcc7e9fad3854158adfcbdcc9e4663df","value":"Your token has been saved to /root/.cache/huggingface/token"}},"20832517f0d345f98c66bd6fd9816a5a":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_02f7ce4ff5c74819998f63175c6720e3","placeholder":"​","style":"IPY_MODEL_2458132638eb4b6dab7c41b9322ffbae","value":"Login successful"}},"7d535b0a18af42b2ab5c10414ae0db6a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3234299078f34c018e6a591ebe477a14":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"470ee07fa0b341cc9ed596609f9e51d0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b85c1108819a45f78813dd3a148b7b4c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"edc6fd51cc4e48b29b4ff4cc3beaca87":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dcc7e9fad3854158adfcbdcc9e4663df":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"02f7ce4ff5c74819998f63175c6720e3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2458132638eb4b6dab7c41b9322ffbae":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9a8bc46b3df64f11b759ce9386050685":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_3ea63469a06e4710804b00dc5088f0bf","IPY_MODEL_72ac1f6248b049c886d40a373195c947","IPY_MODEL_14c0f88e629c4af3affe81114e5adb7d"],"layout":"IPY_MODEL_b1d73b696e2e4b55a8c77f7e9f899a10"}},"3ea63469a06e4710804b00dc5088f0bf":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c61b7e7783644ec1814a25ad47c3e505","placeholder":"​","style":"IPY_MODEL_565f9c1738854ff198ebdccd9c7530ce","value":"model.pt: 100%"}},"72ac1f6248b049c886d40a373195c947":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_4350e62412d84530b2d97e7ace53c5bc","max":2579,"min":0,"orientation":"horizontal","style":"IPY_MODEL_6d061a32800e4891ba896d9eb8ef0b5b","value":2579}},"14c0f88e629c4af3affe81114e5adb7d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6862d17e5dcb4cbb8a76e01cd3b44ed0","placeholder":"​","style":"IPY_MODEL_d3985cded95449ffa71d3a8370fff913","value":" 2.58k/2.58k [00:00&lt;00:00, 8.13kB/s]"}},"b1d73b696e2e4b55a8c77f7e9f899a10":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c61b7e7783644ec1814a25ad47c3e505":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"565f9c1738854ff198ebdccd9c7530ce":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4350e62412d84530b2d97e7ace53c5bc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6d061a32800e4891ba896d9eb8ef0b5b":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"6862d17e5dcb4cbb8a76e01cd3b44ed0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d3985cded95449ffa71d3a8370fff913":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2949c6cee3e54b1983894fb28acb6f9c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_74d9fae5441d42c8a645fa183148e2e5","IPY_MODEL_84a93b3f5ff74ee99bc85beb5af76107","IPY_MODEL_a39be9f762244f1fa2f0a6bcedf72a36"],"layout":"IPY_MODEL_a6dcf9190aaf4d7d8ea521e2d0b5b0f9"}},"74d9fae5441d42c8a645fa183148e2e5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_96558faa30944d8eac42935704ea9486","placeholder":"​","style":"IPY_MODEL_a6f10d6908884133b9754963af735566","value":"model.pt: 100%"}},"84a93b3f5ff74ee99bc85beb5af76107":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_4434df81c6834371b5e6e8aac994d146","max":39239,"min":0,"orientation":"horizontal","style":"IPY_MODEL_20e08af188c54eda8e5fb8632c0e924e","value":39239}},"a39be9f762244f1fa2f0a6bcedf72a36":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_01990cbfe9fd4b58841147771799268e","placeholder":"​","style":"IPY_MODEL_ed60418f04294ca3b0f58c0891be7006","value":" 39.2k/39.2k [00:00&lt;00:00, 52.6kB/s]"}},"a6dcf9190aaf4d7d8ea521e2d0b5b0f9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"96558faa30944d8eac42935704ea9486":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a6f10d6908884133b9754963af735566":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4434df81c6834371b5e6e8aac994d146":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"20e08af188c54eda8e5fb8632c0e924e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"01990cbfe9fd4b58841147771799268e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ed60418f04294ca3b0f58c0891be7006":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0}
\ No newline at end of file
diff --git a/HF DeepRL Course/Unit5 - ML-Agents.ipynb b/HF DeepRL Course/Unit5 - ML-Agents.ipynb
new file mode 100644
index 0000000..0e1d1dc
--- /dev/null
+++ b/HF DeepRL Course/Unit5 - ML-Agents.ipynb	
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"markdown","metadata":{"id":"2D3NL_e4crQv"},"source":["# Unit 5: An Introduction to ML-Agents\n","\n"]},{"cell_type":"markdown","source":["<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit7/thumbnail.png\" alt=\"Thumbnail\"/>\n","\n","In this notebook, you'll learn about ML-Agents and train two agents.\n","\n","- The first one will learn to **shoot snowballs onto spawning targets**.\n","- The second need to press a button to spawn a pyramid, then navigate to the pyramid, knock it over, **and move to the gold brick at the top**. To do that, it will need to explore its environment, and we will use a technique called curiosity.\n","\n","After that, you'll be able **to watch your agents playing directly on your browser**.\n","\n","For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"],"metadata":{"id":"97ZiytXEgqIz"}},{"cell_type":"markdown","source":["⬇️ Here is an example of what **you will achieve at the end of this unit.** ⬇️\n"],"metadata":{"id":"FMYrDriDujzX"}},{"cell_type":"markdown","source":["<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit7/pyramids.gif\" alt=\"Pyramids\"/>\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit7/snowballtarget.gif\" alt=\"SnowballTarget\"/>"],"metadata":{"id":"cBmFlh8suma-"}},{"cell_type":"markdown","source":["### 🎮 Environments:\n","\n","- [Pyramids](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Examples.md#pyramids)\n","- SnowballTarget\n","\n","### 📚 RL-Library:\n","\n","- [ML-Agents](https://github.com/Unity-Technologies/ml-agents)\n"],"metadata":{"id":"A-cYE0K5iL-w"}},{"cell_type":"markdown","source":["We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the GitHub Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"qEhtaFh9i31S"}},{"cell_type":"markdown","source":["## Objectives of this notebook 🏆\n","\n","At the end of the notebook, you will:\n","\n","- Understand how works **ML-Agents**, the environment library.\n","- Be able to **train agents in Unity Environments**.\n"],"metadata":{"id":"j7f63r3Yi5vE"}},{"cell_type":"markdown","source":["## This notebook is from the Deep Reinforcement Learning Course\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/deep-rl-course-illustration.jpg\" alt=\"Deep RL Course illustration\"/>"],"metadata":{"id":"viNzVbVaYvY3"}},{"cell_type":"markdown","metadata":{"id":"6p5HnEefISCB"},"source":["In this free course, you will:\n","\n","- 📖 Study Deep Reinforcement Learning in **theory and practice**.\n","- 🧑‍💻 Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- 🤖 Train **agents in unique environments**\n","\n","And more check 📚 the syllabus 👉 https://huggingface.co/deep-rl-course/communication/publishing-schedule\n","\n","Don’t forget to **<a href=\"http://eepurl.com/ic5ZUD\">sign up to the course</a>** (we are collecting your email to be able to **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us 👉🏻 https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"Y-mo_6rXIjRi"},"source":["## Prerequisites 🏗️\n","Before diving into the notebook, you need to:\n","\n","🔲 📚 **Study [what is ML-Agents and how it works by reading Unit 5](https://huggingface.co/deep-rl-course/unit5/introduction)**  🤗  "]},{"cell_type":"markdown","source":["# Let's train our agents 🚀\n","\n","**To validate this hands-on for the certification process, you just need to push your trained models to the Hub**. There’s no results to attain to validate this one. But if you want to get nice results you can try to attain:\n","\n","- For `Pyramids` : Mean Reward = 1.75\n","- For `SnowballTarget` : Mean Reward = 15 or 30 targets hit in an episode.\n"],"metadata":{"id":"xYO1uD5Ujgdh"}},{"cell_type":"markdown","source":["## Set the GPU 💪\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step1.jpg\" alt=\"GPU Step 1\">"],"metadata":{"id":"DssdIjk_8vZE"}},{"cell_type":"markdown","source":["- `Hardware Accelerator > GPU`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step2.jpg\" alt=\"GPU Step 2\">"],"metadata":{"id":"sTfCXHy68xBv"}},{"cell_type":"markdown","metadata":{"id":"an3ByrXYQ4iK"},"source":["## Clone the repository and install the dependencies 🔽\n"]},{"cell_type":"code","execution_count":1,"metadata":{"id":"6WNoL04M7rTa","executionInfo":{"status":"ok","timestamp":1697361786876,"user_tz":-60,"elapsed":7400,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["%%capture\n","# Clone the repository\n","!git clone --depth 1 https://github.com/Unity-Technologies/ml-agents"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"d8wmVcMk7xKo"},"outputs":[],"source":["%%capture\n","# Go inside the repository and install the package\n","%cd ml-agents\n","!pip3 install -e ./ml-agents-envs\n","!pip3 install -e ./ml-agents"]},{"cell_type":"markdown","source":["## SnowballTarget ⛄\n","\n","If you need a refresher on how this environments work check this section 👉\n","https://huggingface.co/deep-rl-course/unit5/snowball-target"],"metadata":{"id":"R5_7Ptd_kEcG"}},{"cell_type":"markdown","metadata":{"id":"HRY5ufKUKfhI"},"source":["### Download and move the environment zip file in `./training-envs-executables/linux/`\n","- Our environment executable is in a zip file.\n","- We need to download it and place it to `./training-envs-executables/linux/`\n","- We use a linux executable because we use colab, and colab machines OS is Ubuntu (linux)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"C9Ls6_6eOKiA"},"outputs":[],"source":["# Here, we create training-envs-executables and linux\n","!mkdir ./training-envs-executables\n","!mkdir ./training-envs-executables/linux"]},{"cell_type":"markdown","metadata":{"id":"jsoZGxr1MIXY"},"source":["Download the file SnowballTarget.zip from https://drive.google.com/file/d/1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5 using `wget`.\n","\n","Check out the full solution to download large files from GDrive [here](https://bcrf.biochem.wisc.edu/2021/02/05/download-google-drive-files-using-wget/)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"QU6gi8CmWhnA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697206519710,"user_tz":-60,"elapsed":1990,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"8af5d19f-b177-4831-acf2-c8c18b94668a"},"outputs":[{"output_type":"stream","name":"stdout","text":["--2023-10-13 14:15:18--  https://docs.google.com/uc?export=download&confirm=t&id=1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5\n","Resolving docs.google.com (docs.google.com)... 74.125.142.100, 74.125.142.101, 74.125.142.138, ...\n","Connecting to docs.google.com (docs.google.com)|74.125.142.100|:443... connected.\n","HTTP request sent, awaiting response... 303 See Other\n","Location: https://doc-14-28-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9qrrn3l294p7iisdst09cdhp9igi3ghk/1697206500000/15803371278684422230/*/1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5?e=download&uuid=db620364-aaaf-4a37-9e6a-24defb0a225a [following]\n","Warning: wildcards not supported in HTTP.\n","--2023-10-13 14:15:18--  https://doc-14-28-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9qrrn3l294p7iisdst09cdhp9igi3ghk/1697206500000/15803371278684422230/*/1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5?e=download&uuid=db620364-aaaf-4a37-9e6a-24defb0a225a\n","Resolving doc-14-28-docs.googleusercontent.com (doc-14-28-docs.googleusercontent.com)... 142.250.99.132, 2607:f8b0:400e:c0c::84\n","Connecting to doc-14-28-docs.googleusercontent.com (doc-14-28-docs.googleusercontent.com)|142.250.99.132|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 35134213 (34M) [application/x-zip-compressed]\n","Saving to: ‘./training-envs-executables/linux/SnowballTarget.zip’\n","\n","./training-envs-exe 100%[===================>]  33.51M  53.3MB/s    in 0.6s    \n","\n","2023-10-13 14:15:19 (53.3 MB/s) - ‘./training-envs-executables/linux/SnowballTarget.zip’ saved [35134213/35134213]\n","\n"]}],"source":["!wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5\" -O ./training-envs-executables/linux/SnowballTarget.zip && rm -rf /tmp/cookies.txt"]},{"cell_type":"markdown","source":["We unzip the executable.zip file"],"metadata":{"id":"_LLVaEEK3ayi"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"8FPx0an9IAwO"},"outputs":[],"source":["%%capture\n","!unzip -d ./training-envs-executables/linux/ ./training-envs-executables/linux/SnowballTarget.zip"]},{"cell_type":"markdown","metadata":{"id":"nyumV5XfPKzu"},"source":["Make sure your file is accessible"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"EdFsLJ11JvQf"},"outputs":[],"source":["!chmod -R 755 ./training-envs-executables/linux/SnowballTarget"]},{"cell_type":"markdown","source":["### Define the SnowballTarget config file\n","- In ML-Agents, you define the **training hyperparameters into config.yaml files.**\n","\n","There are multiple hyperparameters. To know them better, you should check for each explanation with [the documentation](https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Training-Configuration-File.md)\n","\n","\n","So you need to create a `SnowballTarget.yaml` config file in ./content/ml-agents/config/ppo/\n","\n","We'll give you here a first version of this config (to copy and paste into your `SnowballTarget.yaml file`), **but you should modify it**.\n","\n","```\n","behaviors:\n","  SnowballTarget:\n","    trainer_type: ppo\n","    summary_freq: 10000\n","    keep_checkpoints: 10\n","    checkpoint_interval: 50000\n","    max_steps: 200000\n","    time_horizon: 64\n","    threaded: true\n","    hyperparameters:\n","      learning_rate: 0.0003\n","      learning_rate_schedule: linear\n","      batch_size: 128\n","      buffer_size: 2048\n","      beta: 0.005\n","      epsilon: 0.2\n","      lambd: 0.95\n","      num_epoch: 3\n","    network_settings:\n","      normalize: false\n","      hidden_units: 256\n","      num_layers: 2\n","      vis_encode_type: simple\n","    reward_signals:\n","      extrinsic:\n","        gamma: 0.99\n","        strength: 1.0\n","```"],"metadata":{"id":"NAuEq32Mwvtz"}},{"cell_type":"markdown","source":["<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit7/snowballfight_config1.png\" alt=\"Config SnowballTarget\"/>\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit7/snowballfight_config2.png\" alt=\"Config SnowballTarget\"/>"],"metadata":{"id":"4U3sRH4N4h_l"}},{"cell_type":"markdown","source":["As an experimentation, you should also try to modify some other hyperparameters. Unity provides very [good documentation explaining each of them here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-Configuration-File.md).\n","\n","Now that you've created the config file and understand what most hyperparameters do, we're ready to train our agent 🔥."],"metadata":{"id":"JJJdo_5AyoGo"}},{"cell_type":"markdown","metadata":{"id":"f9fI555bO12v"},"source":["### Train the agent\n","\n","To train our agent, we just need to **launch mlagents-learn and select the executable containing the environment.**\n","\n","We define four parameters:\n","\n","1. `mlagents-learn <config>`: the path where the hyperparameter config file is.\n","2. `--env`: where the environment executable is.\n","3. `--run_id`: the name you want to give to your training run id.\n","4. `--no-graphics`: to not launch the visualization during the training.\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit7/mlagentslearn.png\" alt=\"MlAgents learn\"/>\n","\n","Train the model and use the `--resume` flag to continue training in case of interruption.\n","\n","> It will fail first time if and when you use `--resume`, try running the block again to bypass the error.\n","\n"]},{"cell_type":"markdown","source":["The training will take 10 to 35min depending on your config, go take a ☕️you deserve it 🤗."],"metadata":{"id":"lN32oWF8zPjs"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"bS-Yh1UdHfzy","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697207065541,"user_tz":-60,"elapsed":449500,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"ab21b023-f1d1-44f4-bfb9-12d26355902d"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-13 14:17:00.914435: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","\n","            ┐  ╖\n","        ╓╖╬│╡  ││╬╖╖\n","    ╓╖╬│││││┘  ╬│││││╬╖\n"," ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗\n"," ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣\n"," ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣\n"," ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣\n"," ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜\n"," ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣\n"," ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣\n","   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣\n","      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜\n","          ╙╬╬╬╣╣╣╜\n","             ╙\n","        \n"," Version information:\n","  ml-agents: 1.1.0.dev0,\n","  ml-agents-envs: 1.1.0.dev0,\n","  Communicator API: 1.5.0,\n","  PyTorch: 2.0.1+cu118\n","[INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0\n","[INFO] Connected new brain: SnowballTarget?team=0\n","[INFO] Hyperparameters for behavior name SnowballTarget: \n","\ttrainer_type:\tppo\n","\thyperparameters:\t\n","\t  batch_size:\t128\n","\t  buffer_size:\t2048\n","\t  learning_rate:\t0.0003\n","\t  beta:\t0.005\n","\t  epsilon:\t0.2\n","\t  lambd:\t0.95\n","\t  num_epoch:\t3\n","\t  shared_critic:\tFalse\n","\t  learning_rate_schedule:\tlinear\n","\t  beta_schedule:\tlinear\n","\t  epsilon_schedule:\tlinear\n","\tcheckpoint_interval:\t50000\n","\tnetwork_settings:\t\n","\t  normalize:\tFalse\n","\t  hidden_units:\t256\n","\t  num_layers:\t2\n","\t  vis_encode_type:\tsimple\n","\t  memory:\tNone\n","\t  goal_conditioning_type:\thyper\n","\t  deterministic:\tFalse\n","\treward_signals:\t\n","\t  extrinsic:\t\n","\t    gamma:\t0.99\n","\t    strength:\t1.0\n","\t    network_settings:\t\n","\t      normalize:\tFalse\n","\t      hidden_units:\t128\n","\t      num_layers:\t2\n","\t      vis_encode_type:\tsimple\n","\t      memory:\tNone\n","\t      goal_conditioning_type:\thyper\n","\t      deterministic:\tFalse\n","\tinit_path:\tNone\n","\tkeep_checkpoints:\t10\n","\teven_checkpoints:\tFalse\n","\tmax_steps:\t200000\n","\ttime_horizon:\t64\n","\tsummary_freq:\t10000\n","\tthreaded:\tTrue\n","\tself_play:\tNone\n","\tbehavioral_cloning:\tNone\n","[INFO] SnowballTarget. Step: 10000. Time Elapsed: 32.631 s. Mean Reward: 3.659. Std of Reward: 2.235. Training.\n","[INFO] SnowballTarget. Step: 20000. Time Elapsed: 54.320 s. Mean Reward: 5.909. Std of Reward: 2.856. Training.\n","[INFO] SnowballTarget. Step: 30000. Time Elapsed: 74.734 s. Mean Reward: 9.364. Std of Reward: 2.672. Training.\n","[INFO] SnowballTarget. Step: 40000. Time Elapsed: 95.906 s. Mean Reward: 10.964. Std of Reward: 2.635. Training.\n","[INFO] SnowballTarget. Step: 50000. Time Elapsed: 117.648 s. Mean Reward: 13.364. Std of Reward: 2.297. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-49936.onnx\n","[INFO] SnowballTarget. Step: 60000. Time Elapsed: 139.233 s. Mean Reward: 15.473. Std of Reward: 2.456. Training.\n","[INFO] SnowballTarget. Step: 70000. Time Elapsed: 160.278 s. Mean Reward: 16.568. Std of Reward: 2.199. Training.\n","[INFO] SnowballTarget. Step: 80000. Time Elapsed: 183.213 s. Mean Reward: 18.618. Std of Reward: 2.576. Training.\n","[INFO] SnowballTarget. Step: 90000. Time Elapsed: 203.479 s. Mean Reward: 19.977. Std of Reward: 2.127. Training.\n","[INFO] SnowballTarget. Step: 100000. Time Elapsed: 225.885 s. Mean Reward: 19.782. Std of Reward: 2.172. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-99960.onnx\n","[INFO] SnowballTarget. Step: 110000. Time Elapsed: 247.541 s. Mean Reward: 20.370. Std of Reward: 2.429. Training.\n","[INFO] SnowballTarget. Step: 120000. Time Elapsed: 268.255 s. Mean Reward: 21.444. Std of Reward: 2.809. Training.\n","[INFO] SnowballTarget. Step: 130000. Time Elapsed: 290.602 s. Mean Reward: 22.727. Std of Reward: 2.720. Training.\n","[INFO] SnowballTarget. Step: 140000. Time Elapsed: 312.492 s. Mean Reward: 23.318. Std of Reward: 2.419. Training.\n","[INFO] SnowballTarget. Step: 150000. Time Elapsed: 335.058 s. Mean Reward: 24.236. Std of Reward: 2.106. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-149984.onnx\n","[INFO] SnowballTarget. Step: 160000. Time Elapsed: 356.424 s. Mean Reward: 24.318. Std of Reward: 2.678. Training.\n","[INFO] SnowballTarget. Step: 170000. Time Elapsed: 378.086 s. Mean Reward: 24.818. Std of Reward: 2.241. Training.\n","[INFO] SnowballTarget. Step: 180000. Time Elapsed: 399.156 s. Mean Reward: 24.659. Std of Reward: 2.215. Training.\n","[INFO] SnowballTarget. Step: 190000. Time Elapsed: 421.498 s. Mean Reward: 25.255. Std of Reward: 2.290. Training.\n","[INFO] SnowballTarget. Step: 200000. Time Elapsed: 441.510 s. Mean Reward: 24.955. Std of Reward: 2.296. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-199984.onnx\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-200112.onnx\n","[INFO] Copied results/SnowballTarget1/SnowballTarget/SnowballTarget-200112.onnx to results/SnowballTarget1/SnowballTarget.onnx.\n"]}],"source":["!mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=\"SnowballTarget1\" --no-graphics"]},{"cell_type":"markdown","metadata":{"id":"5Vue94AzPy1t"},"source":["### Push the agent to the 🤗 Hub\n","\n","- Now that we trained our agent, we’re **ready to push it to the Hub to be able to visualize it playing on your browser🔥.**"]},{"cell_type":"markdown","source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1️⃣ (If it's not already done) create an account to HF ➡ https://huggingface.co/join\n","\n","2️⃣ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/create-token.jpg\" alt=\"Create HF Token\">\n","\n","- Copy the token\n","- Run the cell below and paste the token"],"metadata":{"id":"izT6FpgNzZ6R"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"rKt2vsYoK56o","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["4428338482f948e6be752e8e9827c841","a37b769ce02840ed9b83bbde4f4daac7","197f3a0f7b2b4618aaa2c0abcc121768","0de73d47456740c8a7aec3a2c23429e4","dbb882b2352b44cb8d0a7646b3ef0a96","48c1ef961a6942199d437ba1b8456f6f","357cfe11cb4f458bb70372a6d1209262","ae25137c62ab4c6aa00ea707937c68bd","2196e1264505454098d83d5a986c122f","ebda32c0c8d94222b1bd72cedab9a01c","b03c362004d24d65aa07a298d9cbf7d6","1d64929e8b8d4dc18a2175c82567e1c7","ac0b7664442f4c68bd72781ddb87ea9e","47893006c1f54e96adaffd43521d4c16","83fd942f1b9e4df09cf684609dec4ee7","80c35c2596fe4098b77e9ab7b4afba34","3a38f167af43410b9dc9f279cc7b5a01","d54984153efa4edc957d350eda7d8f93","2f90a2040a854b83a2ec49155655c495","22da873132224b989e009434a3369a68","249ca35c9a7946d2b4f17f6c3eddc0ab","c50f669c6c6d4033888d1e4cf549b7ca","398634082d6c4fd49b0b518b6c20e3e3","d3c41cdcff7d4056a45b1894f83bfcb4","f4979c13430b456ea912de1c06712416","95fda6ca627a40f6a0cea7450dff1ba4","c2b4330ea8b9461ca1b137d100988f13","44ac7a9c241c47ec97260fbed13aa199","243b25a12f2442049b65f25b6154e07c","bc379cef55ae4a70973b5ff21a867e5a","280358528f3b481f9dac3af422f88aff","abf0998b051f4b5f9d9098fe3a7867be"]},"executionInfo":{"status":"ok","timestamp":1697207093945,"user_tz":-60,"elapsed":335,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"54143fc9-86f6-4a10-e300-16ee9e514911"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"4428338482f948e6be752e8e9827c841"}},"metadata":{}}],"source":["from huggingface_hub import notebook_login\n","notebook_login()"]},{"cell_type":"markdown","source":["If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login`"],"metadata":{"id":"aSU9qD9_6dem"}},{"cell_type":"markdown","source":["Then, we simply need to run `mlagents-push-to-hf`.\n","\n","And we define 4 parameters:\n","\n","1. `--run-id`: the name of the training run id.\n","2. `--local-dir`: where the agent was saved, it’s results/<run_id name>, so in my case results/First Training.\n","3. `--repo-id`: the name of the Hugging Face repo you want to create or update. It’s always <your huggingface username>/<the repo name>\n","If the repo does not exist **it will be created automatically**\n","4. `--commit-message`: since HF repos are git repository you need to define a commit message.\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit7/mlagentspushtohub.png\" alt=\"Push to Hub\"/>\n","\n","For instance:\n","\n","`!mlagents-push-to-hf  --run-id=\"SnowballTarget1\" --local-dir=\"./results/SnowballTarget1\" --repo-id=\"ThomasSimonini/ppo-SnowballTarget\"  --commit-message=\"First Push\"`"],"metadata":{"id":"KK4fPfnczunT"}},{"cell_type":"code","source":["!mlagents-push-to-hf --run-id=\"SnowballTarget1\" --local-dir=\"./results/SnowballTarget1\" --repo-id=\"ThomasSimonini/ppo-SnowballTarget\" --commit-message=\"First Push\""],"metadata":{"id":"kAFzVB7OYj_H"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"dGEFAIboLVc6","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697207191747,"user_tz":-60,"elapsed":4408,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"412789af-62ba-4ba7-9ab5-dd4087df896d"},"outputs":[{"output_type":"stream","name":"stdout","text":["[INFO] This function will create a model card and upload your SnowballTarget1 into HuggingFace Hub. This is a work in progress: If you encounter a bug, please send open an issue\n","[INFO] Pushing repo SnowballTarget1 to the Hugging Face Hub\n","SnowballTarget-149984.onnx:   0% 0.00/651k [00:00<?, ?B/s]\n","SnowballTarget-149984.pt:   0% 0.00/3.85M [00:00<?, ?B/s]\u001b[A\n","\n","SnowballTarget.onnx:   0% 0.00/651k [00:00<?, ?B/s]\u001b[A\u001b[A\n","\n","\n","SnowballTarget.onnx:   0% 0.00/651k [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","Upload 13 LFS files:   0% 0/13 [00:00<?, ?it/s]\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","\n","SnowballTarget-199984.pt:   0% 0.00/3.85M [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n","SnowballTarget-149984.pt:   0% 16.4k/3.85M [00:00<03:00, 21.2kB/s]\u001b[A\n","\n","SnowballTarget.onnx:   3% 16.4k/651k [00:00<00:30, 20.9kB/s]\u001b[A\u001b[A\n","\n","\n","\n","\n","SnowballTarget-199984.pt:   0% 16.4k/3.85M [00:00<03:03, 20.9kB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","SnowballTarget-149984.onnx:   3% 16.4k/651k [00:00<00:31, 20.3kB/s]\n","SnowballTarget-149984.pt:   5% 197k/3.85M [00:00<00:12, 288kB/s]  \u001b[A\n","\n","SnowballTarget.onnx:  25% 164k/651k [00:00<00:02, 236kB/s]  \u001b[A\u001b[A\n","\n","\n","\n","\n","SnowballTarget-199984.pt:   5% 197k/3.85M [00:00<00:12, 285kB/s]  \u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","SnowballTarget-149984.onnx:  25% 164k/651k [00:00<00:02, 231kB/s]  \n","SnowballTarget-149984.pt:  14% 557k/3.85M [00:01<00:03, 838kB/s]\u001b[A\n","\n","SnowballTarget.onnx:  91% 590k/651k [00:01<00:00, 901kB/s]\u001b[A\u001b[A\n","\n","\n","\n","\n","SnowballTarget-199984.pt:  14% 524k/3.85M [00:01<00:04, 778kB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","SnowballTarget-149984.onnx:  78% 508k/651k [00:01<00:00, 752kB/s]\n","SnowballTarget-149984.pt:  60% 2.29M/3.85M [00:01<00:00, 3.87MB/s]\u001b[A\n","\n","\n","\n","\n","SnowballTarget-149984.onnx: 100% 651k/651k [00:01<00:00, 499kB/s]\n","SnowballTarget.onnx: 100% 651k/651k [00:01<00:00, 484kB/s]\n","SnowballTarget.onnx: 100% 651k/651k [00:01<00:00, 481kB/s]\n","SnowballTarget-149984.pt: 100% 3.85M/3.85M [00:01<00:00, 2.73MB/s]\n","SnowballTarget-199984.pt: 100% 3.85M/3.85M [00:01<00:00, 2.59MB/s]\n","SnowballTarget-200112.pt:   0% 0.00/3.85M [00:00<?, ?B/s]\n","SnowballTarget-49936.onnx:   0% 0.00/651k [00:00<?, ?B/s]\u001b[A\n","\n","SnowballTarget-49936.pt:   0% 0.00/3.85M [00:00<?, ?B/s]\u001b[A\u001b[A\n","\n","\n","SnowballTarget-200112.pt:  54% 2.06M/3.85M [00:00<00:00, 16.0MB/s]\n","\n","\n","\n","Upload 13 LFS files:   8% 1/13 [00:01<00:23,  1.95s/it]\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","\n","SnowballTarget-99960.onnx: 100% 651k/651k [00:00<00:00, 3.79MB/s]\n","\n","\n","\n","\n","\n","SnowballTarget-49936.onnx: 100% 651k/651k [00:00<00:00, 2.10MB/s]\n","\n","SnowballTarget-49936.pt: 100% 3.85M/3.85M [00:00<00:00, 10.4MB/s]\n","SnowballTarget-200112.pt: 100% 3.85M/3.85M [00:00<00:00, 8.81MB/s]\n","events.out.tfevents.1697206622.5975dae249c9.1275.0:   0% 0.00/28.4k [00:00<?, ?B/s]\n","\n","\n","\n","\n","SnowballTarget-99960.pt:  67% 2.59M/3.85M [00:00<00:00, 8.59MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","Upload 13 LFS files:  46% 6/13 [00:02<00:02,  3.22it/s]\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","events.out.tfevents.1697206622.5975dae249c9.1275.0: 100% 28.4k/28.4k [00:00<00:00, 218kB/s]\n","checkpoint.pt: 100% 3.85M/3.85M [00:00<00:00, 10.8MB/s]\n","SnowballTarget-99960.pt: 100% 3.85M/3.85M [00:00<00:00, 6.09MB/s]\n","SnowballTarget.onnx: 100% 651k/651k [00:00<00:00, 2.60MB/s]\n","\n","\n","\n","\n","Upload 13 LFS files: 100% 13/13 [00:02<00:00,  4.83it/s]\n","[INFO] Your model is pushed to the hub. You can view your model here: https://huggingface.co/jake-walker/ppo-SnowballTarget\n"]}],"source":["!mlagents-push-to-hf  --run-id=\"SnowballTarget1\" --local-dir=\"./results/SnowballTarget1\" --repo-id=\"jake-walker/ppo-SnowballTarget\" --commit-message=\"Initial commit\""]},{"cell_type":"markdown","source":["Else, if everything worked you should have this at the end of the process(but with a different url 😆) :\n","\n","\n","\n","```\n","Your model is pushed to the hub. You can view your model here: https://huggingface.co/ThomasSimonini/ppo-SnowballTarget\n","```\n","\n","It’s the link to your model, it contains a model card that explains how to use it, your Tensorboard and your config file. **What’s awesome is that it’s a git repository, that means you can have different commits, update your repository with a new push etc.**"],"metadata":{"id":"yborB0850FTM"}},{"cell_type":"markdown","source":["But now comes the best: **being able to visualize your agent online 👀.**"],"metadata":{"id":"5Uaon2cg0NrL"}},{"cell_type":"markdown","source":["### Watch your agent playing 👀\n","\n","For this step it’s simple:\n","\n","1. Remember your repo-id\n","\n","2. Go here: https://huggingface.co/spaces/ThomasSimonini/ML-Agents-SnowballTarget\n","\n","3. Launch the game and put it in full screen by clicking on the bottom right button\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit7/snowballtarget_load.png\" alt=\"Snowballtarget load\"/>"],"metadata":{"id":"VMc4oOsE0QiZ"}},{"cell_type":"markdown","source":["1. In step 1, choose your model repository which is the model id (in my case ThomasSimonini/ppo-SnowballTarget).\n","\n","2. In step 2, **choose what model you want to replay**:\n","  - I have multiple one, since we saved a model every 500000 timesteps.\n","  - But if I want the more recent I choose `SnowballTarget.onnx`\n","\n","👉 What’s nice **is to try with different models step to see the improvement of the agent.**\n","\n","And don't hesitate to share the best score your agent gets on discord in #rl-i-made-this channel 🔥\n","\n","Let's now try a harder environment called Pyramids..."],"metadata":{"id":"Djs8c5rR0Z8a"}},{"cell_type":"markdown","source":["## Pyramids 🏆\n","\n","### Download and move the environment zip file in `./training-envs-executables/linux/`\n","- Our environment executable is in a zip file.\n","- We need to download it and place it to `./training-envs-executables/linux/`\n","- We use a linux executable because we use colab, and colab machines OS is Ubuntu (linux)"],"metadata":{"id":"rVMwRi4y_tmx"}},{"cell_type":"markdown","metadata":{"id":"NyqYYkLyAVMK"},"source":["Download the file Pyramids.zip from https://drive.google.com/uc?export=download&id=1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H using `wget`. Check out the full solution to download large files from GDrive [here](https://bcrf.biochem.wisc.edu/2021/02/05/download-google-drive-files-using-wget/)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"AxojCsSVAVMP","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697207442454,"user_tz":-60,"elapsed":2411,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"e5398bab-dbcf-482f-94ae-12809eae5da8"},"outputs":[{"output_type":"stream","name":"stdout","text":["--2023-10-13 14:30:40--  https://docs.google.com/uc?export=download&confirm=t&id=1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H\n","Resolving docs.google.com (docs.google.com)... 74.125.197.138, 74.125.197.102, 74.125.197.139, ...\n","Connecting to docs.google.com (docs.google.com)|74.125.197.138|:443... connected.\n","HTTP request sent, awaiting response... 303 See Other\n","Location: https://doc-04-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9basvv5fve5ps442dm9pekn83q111ukb/1697207400000/09764732090272539193/*/1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H?e=download&uuid=9a5b3206-d62a-4673-b836-ff7744a46256 [following]\n","Warning: wildcards not supported in HTTP.\n","--2023-10-13 14:30:40--  https://doc-04-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9basvv5fve5ps442dm9pekn83q111ukb/1697207400000/09764732090272539193/*/1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H?e=download&uuid=9a5b3206-d62a-4673-b836-ff7744a46256\n","Resolving doc-04-8c-docs.googleusercontent.com (doc-04-8c-docs.googleusercontent.com)... 172.253.117.132, 2607:f8b0:400e:c0a::84\n","Connecting to doc-04-8c-docs.googleusercontent.com (doc-04-8c-docs.googleusercontent.com)|172.253.117.132|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 42907187 (41M) [application/zip]\n","Saving to: ‘./training-envs-executables/linux/Pyramids.zip’\n","\n","./training-envs-exe 100%[===================>]  40.92M  40.4MB/s    in 1.0s    \n","\n","2023-10-13 14:30:42 (40.4 MB/s) - ‘./training-envs-executables/linux/Pyramids.zip’ saved [42907187/42907187]\n","\n"]}],"source":["!wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H\" -O ./training-envs-executables/linux/Pyramids.zip && rm -rf /tmp/cookies.txt"]},{"cell_type":"markdown","metadata":{"id":"bfs6CTJ1AVMP"},"source":["**OR** Download directly to local machine and then drag and drop the file from local machine to `./training-envs-executables/linux`"]},{"cell_type":"markdown","metadata":{"id":"H7JmgOwcSSmF"},"source":["Wait for the upload to finish and then run the command below.\n","\n","![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAASYAAAAfCAYAAABKxmALAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAmZSURBVHhe7d0NTNTnHQfwL+rxcigHFHUH9LCCVuaKCWgB4029Gq7NcBuLPdPKEhh6iRhEmauLY2V0ptEUSZGwTJTA5jSBOnGDmpyxEMUqxEH0rOIQfDmtt/oCnsof9BT2PPAcx8nbeTQi9vcxF56X+///h8n98vs9/xfcYmJiukEIIS+RCeInIYS8NChjIoS4LCwsDMHBwfDx8cEc76u4dNcT5y63orm5WbzDNRSYCCHPhQejuLg4xMbGwtvbW4wCymNLRAuQFAtwpW0Sio+34+uzN8So8ygwEUKc4uXlheTkZCxbtkyMOOofmGy63P1R37EAm4u+QUdHhxgdGa0xEUJGxLOknJycQYPSvXv3cPnyZTx0n4Fut4litNeEx61YMNGAQxun9uzDWZQxEUKGxQNKVlaWQ9nGg9Hhw4dx8uRJ3Lx5U4wCQYouvD3bB+qfTEWsT70Ytfv1l287tf5EgYkQMiRevvFMSalUihHAYDCgpKQEjx49EiODWzwL+MN73Zji1iZGgO/8l+ODbQ0jlnXjtJQLheZ9HTQzRXcAHbKK8pEeK7rfi5GOScirh68p9Q9K+/fvx65du0YMStyxS0DcTjecuh8lRoDprRXY8pvFoje0IQJTNNLzilBU5PjK35qO+Dly8Z6xFAF1nBZaTbjovwhjcUxCxg4v4fqvKfFM6cCBA6LnvIziK7jdFSh6wBKfE4iaGyJ6gxs2Y5LOFiIlJYW90pC5rQT1CEfCulRoxzw2lSN7TQo272kU/RdhLI5JyNjhlwTY8DUlXr65avMBq2gBkyQTUjUeoje4IdaYeMakR1hLIdJ21okxJigJ2z+JgeVQBpoj86GVVSEzcx/M/ecqMvCVajv0Ac1okEcg0s8EQ0o2KiNX4XeJaqgUMqDLCkuLAQXbytHCt41NR/7qqWhmhwpboISchUvJXIeyf9yGWh+PUAV7T5cFLV8W4NNDfAteqmmBIynILuU7CEXC79chfhZ7Y5cE87lrkM0Lwc09acg7xablaui36BCt7I2okrkB5TsKUGUvfXutzEJRnEp07Ew9x3E8ZvT6/J7fsW5CWO9+2XFNJ4rx2d8aIIntCBnP9u7d27fgzUs4V7Kl/nKT3+hbEO+Y8hY0W7/taQ/GxTUmCZUXTYAyDGqRPckXhSDAakLjEfG1DAqHsqkMBTuKYUA8Nug1UJjKkJmWgrQdNbDMiEdyor12ZTtDiKIWxR9nILOYfbmV0Uj6rRqPq/OQsSkbZedZ+Hl3JdvTQBF6PeJnSKgpzkTGx8Wo9QhEgJjj4tevQrS8Gfsy05DGAmmzPBK61AQMSPxKs0WGyF+ZMFxlUd5iRFWFmH9WUAh86oqRuSkTJSctUP40GalLxRwh4xgv4/qfheNn30ar5pvbogV4Si3DXj7gfGDyi0D8h1Es+JjR3MBCU4URLVYVwpfzr7cc2jAlrNcbYbClC7dqUVBoQMMFEyzyepTu/BSfFVbBzOali/vQyNIs5Rtq8WbOjPqCSjSYLTCzzMPIg6m5Hn9hx7G0sayr7hokmRKhAxa0o7F0bgAsZ8tQcsIMC8uGKgvqe7M4Qe4hg9V8EbXs4JK5CmUHK1Fz8Q54IjYU5ft6aFiwMx7cjZqhUqBv+edrgLnNzIJiGc61yRE2n2VVhIxz/DYTG17G9b8kwFXHL9wXLVaqPZUQPWuy6A00bGCSz9PbF79z0pGgsqCh9HOU8aAhGdB43cqSJi0LSxqEv85KnvMGexnTyUoq0WTRAHemxEC/9a99+9Pyisnh6FZY+zaWYH3Kfjy1OlEWhcB3shV3bhhFn5HYvkSTq2lohHW2Dvm5W7GFZU8Rlirs+2eNQ/ByEKSD/h0VpLPl2H1imE/g8PmMMN6QIJMPF+4IGR/4vW82ra2tojU6d9snoHPSNNFjFZB/p2gNNGxgsi9+i1daJgqqLbZZGM6zcu71cGji5kCFfmXcs9gXfUMyL+XKkb2pd18GtumLYq7IQdpHeSg7ZcLjaVHQbdiO7aujxeyzQqFbrYFKMqJ8T40TgdFOLpOJFiFkMN39Qk433ERrIOdLuUFIh4wsHKmgficE6F/GPUulRIDMhNrPDTD1LDjLIXO8cn0UruHeQxkCgiNEn5HLYA8REdB+uAraYCMMXxQiJzMDef+REPBWDCsCBwpdmQiNSkLDF3lDl3CDUkLpz0pGyRa4CRm/7t+3l13+/v6iNTp+8i54Pfmf6AHNrZ6iNdCoAhNQCeN1ICBADnNzvzLuWZYONqdE1Go1VDNYIEveAnWQmBu1OlSfvwPFPB2SFimhUEZCtzGGHc0uZJ4GCSuTEO3HOn4RiORn0To7wP/r5ZEJSErUsDyJmalD4lJWwjWUooCfzRuJKgablkdAwf5FJOoRM01C4wmDmCRk/Lpxw/5EAF9fXwQG2q9DctWicPsyR/ckb5y+9ED0BhplYGKhiZdzvIyrGCa9uFCIwgoTZNFJyPpjFnSht3GOn/HylDsEEFcZCwtReVXOAt5W5H6SjIgH13BHzPG1n8JdZWhEFPQ5vWtlUR7NqNxVyMZY0IqMgXqxmuVVzIJwqFiqpYjst7bGXvnrhyj7bt2ENXoNcotykb44AOavirHbmYBGyEuO38/W3t4uesDChQtFy3VLIuznyjvks4e9Z27U98r1XM/jV4O07LLnWo8Z72y/dwr7vQl5FaWmpvZd+c3PzK1du9apW1EGMz/EDfm/vCt6LFfpikFKfpPoDeRyxiRXhiJ8/iq8+2MZWs5W/qCCEiE/BEeOHBGt3nIuKSlJ9J7fn39hX+jukilQMsICrsuBac6v1mHTWg0CrhpQeojCEiGvGl5qHT16VPQArVaLFStWiJ7z8lOC4etmX1w50zkfNWeGf6olPfaEEDKk0Tz2JCRgEnI/8EbghCtiBHjo9SZ+nvdwxMeeTAwODv6TaBNCiIMnT56gqampZ/Hb3d29Z8z21AGZTNaz9vTggf3s2mSPbqjn+kH/Xgg2xH4HRbf98oAnMn+sORiIW7duiZGhUcZECBkRD0YbN250yJxseHDiV4er7v8LP5K+hsxqL9tszK+twEd/Nzn911MoYyKEjIgHnurqaigUCsyc6fi0RE9PT/j5+UHx6L/wanW82bfL/TUcf/wzrN95yqlMyYYCEyHEKbysO336NOrr6+Hm5obp06f3lXech+UM3O+dwVOPqZB8ImFsfxMFx4A9/zb2bPs8qJQjhLiMl3i2P3ip9j2DpjZv1F7qxLmL9gVvV1BgIoS8dEZ9SwohhHzfKDARQl4ywP8B/eN9dc0U7ocAAAAASUVORK5CYII=)"]},{"cell_type":"markdown","source":["Unzip it"],"metadata":{"id":"iWUUcs0_794U"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"i2E3K4V2AVMP"},"outputs":[],"source":["%%capture\n","!unzip -d ./training-envs-executables/linux/ ./training-envs-executables/linux/Pyramids.zip"]},{"cell_type":"markdown","metadata":{"id":"KmKYBgHTAVMP"},"source":["Make sure your file is accessible"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Im-nwvLPAVMP"},"outputs":[],"source":["!chmod -R 755 ./training-envs-executables/linux/Pyramids/Pyramids"]},{"cell_type":"markdown","source":["###  Modify the PyramidsRND config file\n","- Contrary to the first environment which was a custom one, **Pyramids was made by the Unity team**.\n","- So the PyramidsRND config file already exists and is in ./content/ml-agents/config/ppo/PyramidsRND.yaml\n","- You might asked why \"RND\" in PyramidsRND. RND stands for *random network distillation* it's a way to generate curiosity rewards. If you want to know more on that we wrote an article explaning this technique: https://medium.com/data-from-the-trenches/curiosity-driven-learning-through-random-network-distillation-488ffd8e5938\n","\n","For this training, we’ll modify one thing:\n","- The total training steps hyperparameter is too high since we can hit the benchmark (mean reward = 1.75) in only 1M training steps.\n","👉 To do that, we go to config/ppo/PyramidsRND.yaml,**and modify these to max_steps to 1000000.**\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit7/pyramids-config.png\" alt=\"Pyramids config\"/>"],"metadata":{"id":"fqceIATXAgih"}},{"cell_type":"markdown","source":["As an experimentation, you should also try to modify some other hyperparameters, Unity provides a very [good documentation explaining each of them here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-Configuration-File.md).\n","\n","We’re now ready to train our agent 🔥."],"metadata":{"id":"RI-5aPL7BWVk"}},{"cell_type":"markdown","source":["### Train the agent\n","\n","The training will take 30 to 45min depending on your machine, go take a ☕️you deserve it 🤗."],"metadata":{"id":"s5hr1rvIBdZH"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"fXi4-IaHBhqD","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697209575529,"user_tz":-60,"elapsed":2045244,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"882d5b7f-fbb0-4250-8a0c-f561eebb60f0"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-13 14:32:12.375036: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","\n","            ┐  ╖\n","        ╓╖╬│╡  ││╬╖╖\n","    ╓╖╬│││││┘  ╬│││││╬╖\n"," ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗\n"," ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣\n"," ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣\n"," ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣\n"," ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜\n"," ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣\n"," ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣\n","   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣\n","      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜\n","          ╙╬╬╬╣╣╣╜\n","             ╙\n","        \n"," Version information:\n","  ml-agents: 1.1.0.dev0,\n","  ml-agents-envs: 1.1.0.dev0,\n","  Communicator API: 1.5.0,\n","  PyTorch: 2.0.1+cu118\n","[INFO] Connected to Unity environment with package version 2.2.1-exp.1 and communication version 1.5.0\n","[INFO] Connected new brain: Pyramids?team=0\n","[INFO] Hyperparameters for behavior name Pyramids: \n","\ttrainer_type:\tppo\n","\thyperparameters:\t\n","\t  batch_size:\t128\n","\t  buffer_size:\t2048\n","\t  learning_rate:\t0.0003\n","\t  beta:\t0.01\n","\t  epsilon:\t0.2\n","\t  lambd:\t0.95\n","\t  num_epoch:\t3\n","\t  shared_critic:\tFalse\n","\t  learning_rate_schedule:\tlinear\n","\t  beta_schedule:\tlinear\n","\t  epsilon_schedule:\tlinear\n","\tcheckpoint_interval:\t500000\n","\tnetwork_settings:\t\n","\t  normalize:\tFalse\n","\t  hidden_units:\t512\n","\t  num_layers:\t2\n","\t  vis_encode_type:\tsimple\n","\t  memory:\tNone\n","\t  goal_conditioning_type:\thyper\n","\t  deterministic:\tFalse\n","\treward_signals:\t\n","\t  extrinsic:\t\n","\t    gamma:\t0.99\n","\t    strength:\t1.0\n","\t    network_settings:\t\n","\t      normalize:\tFalse\n","\t      hidden_units:\t128\n","\t      num_layers:\t2\n","\t      vis_encode_type:\tsimple\n","\t      memory:\tNone\n","\t      goal_conditioning_type:\thyper\n","\t      deterministic:\tFalse\n","\t  rnd:\t\n","\t    gamma:\t0.99\n","\t    strength:\t0.01\n","\t    network_settings:\t\n","\t      normalize:\tFalse\n","\t      hidden_units:\t64\n","\t      num_layers:\t3\n","\t      vis_encode_type:\tsimple\n","\t      memory:\tNone\n","\t      goal_conditioning_type:\thyper\n","\t      deterministic:\tFalse\n","\t    learning_rate:\t0.0001\n","\t    encoding_size:\tNone\n","\tinit_path:\tNone\n","\tkeep_checkpoints:\t5\n","\teven_checkpoints:\tFalse\n","\tmax_steps:\t1000000\n","\ttime_horizon:\t128\n","\tsummary_freq:\t30000\n","\tthreaded:\tFalse\n","\tself_play:\tNone\n","\tbehavioral_cloning:\tNone\n","[INFO] Pyramids. Step: 30000. Time Elapsed: 57.790 s. Mean Reward: -1.000. Std of Reward: 0.000. Training.\n","[INFO] Pyramids. Step: 60000. Time Elapsed: 112.239 s. Mean Reward: -1.000. Std of Reward: 0.000. Training.\n","[INFO] Pyramids. Step: 90000. Time Elapsed: 166.638 s. Mean Reward: -0.867. Std of Reward: 0.516. Training.\n","[INFO] Pyramids. Step: 120000. Time Elapsed: 224.838 s. Mean Reward: -1.000. Std of Reward: 0.000. Training.\n","[INFO] Pyramids. Step: 150000. Time Elapsed: 285.002 s. Mean Reward: -0.925. Std of Reward: 0.416. Training.\n","[INFO] Pyramids. Step: 180000. Time Elapsed: 341.805 s. Mean Reward: -1.000. Std of Reward: 0.000. Training.\n","[INFO] Pyramids. Step: 210000. Time Elapsed: 401.424 s. Mean Reward: -0.374. Std of Reward: 1.111. Training.\n","[INFO] Pyramids. Step: 240000. Time Elapsed: 459.935 s. Mean Reward: -0.163. Std of Reward: 1.142. Training.\n","[INFO] Pyramids. Step: 270000. Time Elapsed: 519.300 s. Mean Reward: -0.900. Std of Reward: 0.500. Training.\n","[INFO] Pyramids. Step: 300000. Time Elapsed: 581.630 s. Mean Reward: -0.639. Std of Reward: 0.874. Training.\n","[INFO] Pyramids. Step: 330000. Time Elapsed: 645.060 s. Mean Reward: 0.113. Std of Reward: 1.216. Training.\n","[INFO] Pyramids. Step: 360000. Time Elapsed: 706.423 s. Mean Reward: -0.089. Std of Reward: 1.167. Training.\n","[INFO] Pyramids. Step: 390000. Time Elapsed: 769.370 s. Mean Reward: 0.226. Std of Reward: 1.213. Training.\n","[INFO] Pyramids. Step: 420000. Time Elapsed: 827.123 s. Mean Reward: 0.019. Std of Reward: 1.220. Training.\n","[INFO] Pyramids. Step: 450000. Time Elapsed: 887.729 s. Mean Reward: 0.595. Std of Reward: 1.225. Training.\n","[INFO] Pyramids. Step: 480000. Time Elapsed: 951.806 s. Mean Reward: 0.828. Std of Reward: 1.137. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/Pyramids Training/Pyramids/Pyramids-499967.onnx\n","[INFO] Pyramids. Step: 510000. Time Elapsed: 1016.865 s. Mean Reward: 1.057. Std of Reward: 1.036. Training.\n","[INFO] Pyramids. Step: 540000. Time Elapsed: 1078.715 s. Mean Reward: 0.713. Std of Reward: 1.172. Training.\n","[INFO] Pyramids. Step: 570000. Time Elapsed: 1139.891 s. Mean Reward: 0.968. Std of Reward: 1.098. Training.\n","[INFO] Pyramids. Step: 600000. Time Elapsed: 1202.891 s. Mean Reward: 0.862. Std of Reward: 1.172. Training.\n","[INFO] Pyramids. Step: 630000. Time Elapsed: 1263.803 s. Mean Reward: 0.896. Std of Reward: 1.127. Training.\n","[INFO] Pyramids. Step: 660000. Time Elapsed: 1324.749 s. Mean Reward: 0.915. Std of Reward: 1.079. Training.\n","[INFO] Pyramids. Step: 690000. Time Elapsed: 1387.112 s. Mean Reward: 1.132. Std of Reward: 0.993. Training.\n","[INFO] Pyramids. Step: 720000. Time Elapsed: 1449.411 s. Mean Reward: 0.963. Std of Reward: 1.082. Training.\n","[INFO] Pyramids. Step: 750000. Time Elapsed: 1512.370 s. Mean Reward: 1.410. Std of Reward: 0.718. Training.\n","[INFO] Pyramids. Step: 780000. Time Elapsed: 1576.158 s. Mean Reward: 1.171. Std of Reward: 0.886. Training.\n","[INFO] Pyramids. Step: 810000. Time Elapsed: 1636.217 s. Mean Reward: 1.238. Std of Reward: 0.865. Training.\n","[INFO] Pyramids. Step: 840000. Time Elapsed: 1700.688 s. Mean Reward: 1.406. Std of Reward: 0.758. Training.\n","[INFO] Pyramids. Step: 870000. Time Elapsed: 1762.539 s. Mean Reward: 1.073. Std of Reward: 1.067. Training.\n","[INFO] Pyramids. Step: 900000. Time Elapsed: 1825.113 s. Mean Reward: 1.366. Std of Reward: 0.745. Training.\n","[INFO] Pyramids. Step: 930000. Time Elapsed: 1892.548 s. Mean Reward: 1.382. Std of Reward: 0.795. Training.\n","[INFO] Pyramids. Step: 960000. Time Elapsed: 1954.001 s. Mean Reward: 1.404. Std of Reward: 0.773. Training.\n","[INFO] Pyramids. Step: 990000. Time Elapsed: 2020.626 s. Mean Reward: 1.405. Std of Reward: 0.700. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/Pyramids Training/Pyramids/Pyramids-999876.onnx\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/Pyramids Training/Pyramids/Pyramids-1000004.onnx\n","[INFO] Copied results/Pyramids Training/Pyramids/Pyramids-1000004.onnx to results/Pyramids Training/Pyramids.onnx.\n"]}],"source":["!mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=\"Pyramids Training\" --no-graphics"]},{"cell_type":"markdown","metadata":{"id":"txonKxuSByut"},"source":["### Push the agent to the 🤗 Hub\n","\n","- Now that we trained our agent, we’re **ready to push it to the Hub to be able to visualize it playing on your browser🔥.**"]},{"cell_type":"code","source":["!mlagents-push-to-hf --run-id=\"Pyramids\" --local-dir=\"./results/Pyramids Training\" --repo-id=\"jake-walker/ppo-Pyramids\" --commit-message=\"Initial commit\""],"metadata":{"id":"yiEQbv7rB4mU","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697209891309,"user_tz":-60,"elapsed":4221,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"2d31ba07-54ee-4a6c-89bc-d6324e9f5bdb"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[INFO] This function will create a model card and upload your Pyramids into HuggingFace Hub. This is a work in progress: If you encounter a bug, please send open an issue\n","[INFO] Pushing repo Pyramids to the Hugging Face Hub\n","Pyramids.onnx:   0% 0.00/1.42M [00:00<?, ?B/s]\n","Pyramids-1000004.pt:   0% 0.00/8.66M [00:00<?, ?B/s]\u001b[A\n","\n","Pyramids-499967.onnx:   0% 0.00/1.42M [00:00<?, ?B/s]\u001b[A\u001b[A\n","\n","\n","Pyramids-499967.pt:   0% 0.00/8.66M [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","Upload 9 LFS files:   0% 0/9 [00:00<?, ?it/s]\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","\n","Pyramids.onnx:   0% 0.00/1.42M [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","\n","Pyramids.onnx:   1% 16.4k/1.42M [00:00<00:52, 26.8kB/s]\n","Pyramids-1000004.pt:   0% 16.4k/8.66M [00:00<05:22, 26.8kB/s]\u001b[A\n","\n","\n","Pyramids-499967.pt:   0% 16.4k/8.66M [00:00<05:24, 26.7kB/s]\u001b[A\u001b[A\u001b[A\n","\n","Pyramids-499967.onnx:   1% 16.4k/1.42M [00:00<00:52, 26.5kB/s]\u001b[A\u001b[A\n","\n","\n","\n","\n","Pyramids.onnx:  14% 197k/1.42M [00:00<00:03, 346kB/s]  \n","Pyramids-1000004.pt:   2% 164k/8.66M [00:00<00:29, 288kB/s]  \u001b[A\n","\n","\n","Pyramids-499967.pt:   2% 164k/8.66M [00:00<00:29, 287kB/s]  \u001b[A\u001b[A\u001b[A\n","\n","Pyramids-499967.onnx:  12% 164k/1.42M [00:00<00:04, 285kB/s]  \u001b[A\u001b[A\n","\n","\n","\n","\n","Pyramids.onnx:  39% 557k/1.42M [00:00<00:00, 1.03MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n","Pyramids.onnx:  40% 573k/1.42M [00:00<00:00, 997kB/s]\n","\n","\n","Pyramids-499967.pt:   5% 475k/8.66M [00:00<00:09, 826kB/s]\u001b[A\u001b[A\u001b[A\n","\n","Pyramids-499967.onnx:  36% 508k/1.42M [00:00<00:01, 886kB/s]\u001b[A\u001b[A\n","Pyramids-1000004.pt:  32% 2.77M/8.66M [00:00<00:01, 5.40MB/s]\u001b[A\n","\n","\n","Pyramids-499967.pt:  29% 2.49M/8.66M [00:00<00:01, 4.79MB/s]\u001b[A\u001b[A\u001b[A\n","Pyramids-1000004.pt:  67% 5.77M/8.66M [00:01<00:00, 10.2MB/s]\u001b[A\n","\n","\n","Pyramids.onnx: 100% 1.42M/1.42M [00:01<00:00, 1.23MB/s]\n","Pyramids.onnx: 100% 1.42M/1.42M [00:01<00:00, 1.15MB/s]\n","Pyramids-499967.onnx: 100% 1.42M/1.42M [00:01<00:00, 1.15MB/s]\n","Pyramids-499967.pt: 100% 8.66M/8.66M [00:01<00:00, 5.82MB/s]\n","\n","\n","\n","\n","Upload 9 LFS files:  11% 1/9 [00:01<00:11,  1.50s/it]\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","Pyramids-1000004.pt: 100% 8.66M/8.66M [00:01<00:00, 5.72MB/s]\n","Pyramids-999876.pt:  55% 4.80M/8.66M [00:00<00:00, 34.9MB/s]\n","events.out.tfevents.1697207533.5975dae249c9.5168.0:   0% 0.00/305k [00:00<?, ?B/s]\u001b[A\n","\n","events.out.tfevents.1697207533.5975dae249c9.5168.0: 100% 305k/305k [00:00<00:00, 2.78MB/s]\n","\n","\n","checkpoint.pt:  95% 8.22M/8.66M [00:00<00:00, 31.4MB/s]\u001b[A\u001b[A\n","Pyramids.onnx:   0% 0.00/1.42M [00:00<?, ?B/s]\u001b[A\n","\n","\n","\n","Pyramids.onnx: 100% 1.42M/1.42M [00:00<00:00, 6.72MB/s]\n","Pyramids-999876.pt: 100% 8.66M/8.66M [00:01<00:00, 6.81MB/s]\n","checkpoint.pt: 100% 8.66M/8.66M [00:01<00:00, 6.80MB/s]\n","\n","\n","\n","\n","Upload 9 LFS files:  67% 6/9 [00:02<00:01,  2.70it/s]\u001b[A\u001b[A\u001b[A\u001b[A\n","\n","\n","\n","Upload 9 LFS files: 100% 9/9 [00:02<00:00,  3.06it/s]\n","[INFO] Your model is pushed to the hub. You can view your model here: https://huggingface.co/jake-walker/ppo-Pyramids\n"]}]},{"cell_type":"markdown","source":["### Watch your agent playing 👀\n","\n","👉 https://huggingface.co/spaces/unity/ML-Agents-Pyramids"],"metadata":{"id":"7aZfgxo-CDeQ"}},{"cell_type":"markdown","source":["### 🎁 Bonus: Why not train on another environment?\n","Now that you know how to train an agent using MLAgents, **why not try another environment?**\n","\n","MLAgents provides 17 different and we’re building some custom ones. The best way to learn is to try things of your own, have fun.\n","\n"],"metadata":{"id":"hGG_oq2n0wjB"}},{"cell_type":"markdown","source":["![cover](https://miro.medium.com/max/1400/0*xERdThTRRM2k_U9f.png)"],"metadata":{"id":"KSAkJxSr0z6-"}},{"cell_type":"markdown","source":["You have the full list of the Unity official environments here 👉 https://github.com/Unity-Technologies/ml-agents/blob/develop/docs/Learning-Environment-Examples.md\n","\n","For the demos to visualize your agent 👉 https://huggingface.co/unity\n","\n","For now we have integrated:\n","- [Worm](https://huggingface.co/spaces/unity/ML-Agents-Worm) demo where you teach a **worm to crawl**.\n","- [Walker](https://huggingface.co/spaces/unity/ML-Agents-Walker) demo where you teach an agent **to walk towards a goal**."],"metadata":{"id":"YiyF4FX-04JB"}},{"cell_type":"markdown","source":["That’s all for today. Congrats on finishing this tutorial!\n","\n","The best way to learn is to practice and try stuff. Why not try another environment? ML-Agents has 17 different environments, but you can also create your own? Check the documentation and have fun!\n","\n","See you on Unit 6 🔥,\n","\n","## Keep Learning, Stay  awesome 🤗"],"metadata":{"id":"PI6dPWmh064H"}}],"metadata":{"accelerator":"GPU","colab":{"provenance":[{"file_id":"https://github.com/huggingface/deep-rl-class/blob/main/notebooks/unit5/unit5.ipynb","timestamp":1697206423307}],"gpuType":"T4"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"4428338482f948e6be752e8e9827c841":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_249ca35c9a7946d2b4f17f6c3eddc0ab","IPY_MODEL_c50f669c6c6d4033888d1e4cf549b7ca","IPY_MODEL_398634082d6c4fd49b0b518b6c20e3e3","IPY_MODEL_d3c41cdcff7d4056a45b1894f83bfcb4"],"layout":"IPY_MODEL_357cfe11cb4f458bb70372a6d1209262"}},"a37b769ce02840ed9b83bbde4f4daac7":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ae25137c62ab4c6aa00ea707937c68bd","placeholder":"​","style":"IPY_MODEL_2196e1264505454098d83d5a986c122f","value":"<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"}},"197f3a0f7b2b4618aaa2c0abcc121768":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_ebda32c0c8d94222b1bd72cedab9a01c","placeholder":"​","style":"IPY_MODEL_b03c362004d24d65aa07a298d9cbf7d6","value":""}},"0de73d47456740c8a7aec3a2c23429e4":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_1d64929e8b8d4dc18a2175c82567e1c7","style":"IPY_MODEL_ac0b7664442f4c68bd72781ddb87ea9e","value":true}},"dbb882b2352b44cb8d0a7646b3ef0a96":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_47893006c1f54e96adaffd43521d4c16","style":"IPY_MODEL_83fd942f1b9e4df09cf684609dec4ee7","tooltip":""}},"48c1ef961a6942199d437ba1b8456f6f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_80c35c2596fe4098b77e9ab7b4afba34","placeholder":"​","style":"IPY_MODEL_3a38f167af43410b9dc9f279cc7b5a01","value":"\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"}},"357cfe11cb4f458bb70372a6d1209262":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"ae25137c62ab4c6aa00ea707937c68bd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2196e1264505454098d83d5a986c122f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ebda32c0c8d94222b1bd72cedab9a01c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b03c362004d24d65aa07a298d9cbf7d6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1d64929e8b8d4dc18a2175c82567e1c7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ac0b7664442f4c68bd72781ddb87ea9e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"47893006c1f54e96adaffd43521d4c16":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"83fd942f1b9e4df09cf684609dec4ee7":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"80c35c2596fe4098b77e9ab7b4afba34":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3a38f167af43410b9dc9f279cc7b5a01":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d54984153efa4edc957d350eda7d8f93":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2f90a2040a854b83a2ec49155655c495","placeholder":"​","style":"IPY_MODEL_22da873132224b989e009434a3369a68","value":"Connecting..."}},"2f90a2040a854b83a2ec49155655c495":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"22da873132224b989e009434a3369a68":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"249ca35c9a7946d2b4f17f6c3eddc0ab":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f4979c13430b456ea912de1c06712416","placeholder":"​","style":"IPY_MODEL_95fda6ca627a40f6a0cea7450dff1ba4","value":"Token is valid (permission: write)."}},"c50f669c6c6d4033888d1e4cf549b7ca":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c2b4330ea8b9461ca1b137d100988f13","placeholder":"​","style":"IPY_MODEL_44ac7a9c241c47ec97260fbed13aa199","value":"Your token has been saved in your configured git credential helpers (store)."}},"398634082d6c4fd49b0b518b6c20e3e3":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_243b25a12f2442049b65f25b6154e07c","placeholder":"​","style":"IPY_MODEL_bc379cef55ae4a70973b5ff21a867e5a","value":"Your token has been saved to /root/.cache/huggingface/token"}},"d3c41cdcff7d4056a45b1894f83bfcb4":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_280358528f3b481f9dac3af422f88aff","placeholder":"​","style":"IPY_MODEL_abf0998b051f4b5f9d9098fe3a7867be","value":"Login successful"}},"f4979c13430b456ea912de1c06712416":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"95fda6ca627a40f6a0cea7450dff1ba4":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c2b4330ea8b9461ca1b137d100988f13":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"44ac7a9c241c47ec97260fbed13aa199":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"243b25a12f2442049b65f25b6154e07c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bc379cef55ae4a70973b5ff21a867e5a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"280358528f3b481f9dac3af422f88aff":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"abf0998b051f4b5f9d9098fe3a7867be":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0}
\ No newline at end of file
diff --git a/HF DeepRL Course/Unit6 - Advantage Actor Critic.ipynb b/HF DeepRL Course/Unit6 - Advantage Actor Critic.ipynb
new file mode 100644
index 0000000..219d435
--- /dev/null
+++ b/HF DeepRL Course/Unit6 - Advantage Actor Critic.ipynb	
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"markdown","metadata":{"id":"-PTReiOw-RAN"},"source":["# Unit 6: Advantage Actor Critic (A2C) using Robotics Simulations with Panda-Gym 🤖\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit8/thumbnail.png\"  alt=\"Thumbnail\"/>\n","\n","In this notebook, you'll learn to use A2C with [Panda-Gym](https://github.com/qgallouedec/panda-gym). You're going **to train a robotic arm** (Franka Emika Panda robot) to perform a task:\n","\n","- `Reach`: the robot must place its end-effector at a target position.\n","\n","After that, you'll be able **to train in other robotics tasks**.\n"]},{"cell_type":"markdown","metadata":{"id":"QInFitfWno1Q"},"source":["### 🎮 Environments:\n","\n","- [Panda-Gym](https://github.com/qgallouedec/panda-gym)\n","\n","###📚 RL-Library:\n","\n","- [Stable-Baselines3](https://stable-baselines3.readthedocs.io/)"]},{"cell_type":"markdown","metadata":{"id":"2CcdX4g3oFlp"},"source":["We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the GitHub Repo](https://github.com/huggingface/deep-rl-class/issues)."]},{"cell_type":"markdown","metadata":{"id":"MoubJX20oKaQ"},"source":["## Objectives of this notebook 🏆\n","\n","At the end of the notebook, you will:\n","\n","- Be able to use **Panda-Gym**, the environment library.\n","- Be able to **train robots using A2C**.\n","- Understand why **we need to normalize the input**.\n","- Be able to **push your trained agent and the code to the Hub** with a nice video replay and an evaluation score 🔥.\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"DoUNkTExoUED"},"source":["## This notebook is from the Deep Reinforcement Learning Course\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/deep-rl-course-illustration.jpg\" alt=\"Deep RL Course illustration\"/>\n","\n","In this free course, you will:\n","\n","- 📖 Study Deep Reinforcement Learning in **theory and practice**.\n","- 🧑‍💻 Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- 🤖 Train **agents in unique environments**\n","\n","And more check 📚 the syllabus 👉 https://simoninithomas.github.io/deep-rl-course\n","\n","Don’t forget to **<a href=\"http://eepurl.com/ic5ZUD\">sign up to the course</a>** (we are collecting your email to be able to **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us 👉🏻 https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"BTuQAUAPoa5E"},"source":["## Prerequisites 🏗️\n","Before diving into the notebook, you need to:\n","\n","🔲 📚 Study [Actor-Critic methods by reading Unit 6](https://huggingface.co/deep-rl-course/unit6/introduction) 🤗  "]},{"cell_type":"markdown","metadata":{"id":"iajHvVDWoo01"},"source":["# Let's train our first robots 🤖"]},{"cell_type":"markdown","metadata":{"id":"zbOENTE2os_D"},"source":["To validate this hands-on for the [certification process](https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process),  you need to push your trained model to the Hub and get the following results:\n","\n","- `PandaReachDense-v3` get a result of >= -3.5.\n","\n","To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) and find your model, **the result = mean_reward - std of reward**\n","\n","For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"]},{"cell_type":"markdown","metadata":{"id":"PU4FVzaoM6fC"},"source":["## Set the GPU 💪\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step1.jpg\" alt=\"GPU Step 1\">"]},{"cell_type":"markdown","metadata":{"id":"KV0NyFdQM9ZG"},"source":["- `Hardware Accelerator > GPU`\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/gpu-step2.jpg\" alt=\"GPU Step 2\">"]},{"cell_type":"markdown","metadata":{"id":"bTpYcVZVMzUI"},"source":["## Create a virtual display 🔽\n","\n","During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install the librairies and create and run a virtual screen 🖥"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jV6wjQ7Be7p5"},"outputs":[],"source":["%%capture\n","!apt install python-opengl\n","!apt install ffmpeg\n","!apt install xvfb\n","!pip3 install pyvirtualdisplay"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":520,"status":"ok","timestamp":1697802125983,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"ww5PQH1gNLI4","outputId":"405df85f-0516-4518-8a0a-aecb1685ff3e"},"outputs":[{"data":{"text/plain":["<pyvirtualdisplay.display.Display at 0x7fdf581abe80>"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"]},{"cell_type":"markdown","metadata":{"id":"e1obkbdJ_KnG"},"source":["### Install dependencies 🔽\n","\n","The first step is to install the dependencies, we’ll install multiple ones:\n","- `gymnasium`\n","- `panda-gym`: Contains the robotics arm environments.\n","- `stable-baselines3`: The SB3 deep reinforcement learning library.\n","- `huggingface_sb3`: Additional code for Stable-baselines3 to load and upload models from the Hugging Face 🤗 Hub.\n","- `huggingface_hub`: Library allowing anyone to work with the Hub repositories.\n","\n","⏲ The installation can **take 10 minutes**."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":22726,"status":"ok","timestamp":1697802148707,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"TgZUkjKYSgvn","outputId":"0eb69833-0b13-4bf0-dd21-1792727fe05b"},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting stable-baselines3[extra]\n","  Downloading stable_baselines3-2.1.0-py3-none-any.whl (178 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m178.7/178.7 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting gymnasium<0.30,>=0.28.1 (from stable-baselines3[extra])\n","  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (1.23.5)\n","Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (2.1.0+cu118)\n","Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (2.2.1)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (1.5.3)\n","Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (3.7.1)\n","Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (4.8.0.76)\n","Requirement already satisfied: pygame in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (2.5.2)\n","Requirement already satisfied: tensorboard>=2.9.1 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (2.13.0)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (5.9.5)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (4.66.1)\n","Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (13.6.0)\n","Collecting shimmy[atari]~=1.1.0 (from stable-baselines3[extra])\n","  Downloading Shimmy-1.1.0-py3-none-any.whl (37 kB)\n","Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (9.4.0)\n","Collecting autorom[accept-rom-license]~=0.6.1 (from stable-baselines3[extra])\n","  Downloading AutoROM-0.6.1-py3-none-any.whl (9.4 kB)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (8.1.7)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (2.31.0)\n","Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra])\n","  Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (434 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m434.7/434.7 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n","  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium<0.30,>=0.28.1->stable-baselines3[extra]) (4.5.0)\n","Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3[extra])\n","  Using cached Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n","Collecting ale-py~=0.8.1 (from shimmy[atari]~=1.1.0->stable-baselines3[extra])\n","  Downloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.4.0)\n","Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.59.0)\n","Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (2.17.3)\n","Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.0.0)\n","Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.5)\n","Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.20.3)\n","Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (67.7.2)\n","Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (0.7.1)\n","Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.0.0)\n","Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (0.41.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (3.12.4)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (3.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (3.1.2)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (2023.6.0)\n","Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (2.1.0)\n","Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (1.1.1)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (0.12.1)\n","Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (4.43.1)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (1.4.5)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (23.2)\n","Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (3.1.1)\n","Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->stable-baselines3[extra]) (2023.3.post1)\n","Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->stable-baselines3[extra]) (3.0.0)\n","Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->stable-baselines3[extra]) (2.16.1)\n","Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from ale-py~=0.8.1->shimmy[atari]~=1.1.0->stable-baselines3[extra]) (6.1.0)\n","Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (5.3.1)\n","Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (0.3.0)\n","Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (1.16.0)\n","Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (4.9)\n","Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard>=2.9.1->stable-baselines3[extra]) (1.3.1)\n","Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->stable-baselines3[extra]) (0.1.2)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (2023.7.22)\n","Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.9.1->stable-baselines3[extra]) (2.1.3)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13->stable-baselines3[extra]) (1.3.0)\n","Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (0.5.0)\n","Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard>=2.9.1->stable-baselines3[extra]) (3.2.2)\n","Building wheels for collected packages: AutoROM.accept-rom-license\n","  Building wheel for AutoROM.accept-rom-license (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for AutoROM.accept-rom-license: filename=AutoROM.accept_rom_license-0.6.1-py3-none-any.whl size=446660 sha256=84d117179bdc642a07fdd784f2ce51b95d2badf0767dc5c12174cd11df782bac\n","  Stored in directory: /root/.cache/pip/wheels/6b/1b/ef/a43ff1a2f1736d5711faa1ba4c1f61be1131b8899e6a057811\n","Successfully built AutoROM.accept-rom-license\n","Installing collected packages: farama-notifications, gymnasium, ale-py, shimmy, AutoROM.accept-rom-license, autorom, stable-baselines3\n","Successfully installed AutoROM.accept-rom-license-0.6.1 ale-py-0.8.1 autorom-0.6.1 farama-notifications-0.0.4 gymnasium-0.29.1 shimmy-1.1.0 stable-baselines3-2.1.0\n","Requirement already satisfied: gymnasium in /usr/local/lib/python3.10/dist-packages (0.29.1)\n","Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.5.0)\n","Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (0.0.4)\n"]}],"source":["!pip install stable-baselines3[extra]\n","!pip install gymnasium"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":209251,"status":"ok","timestamp":1697802565927,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"ABneW6tOSpyU","outputId":"12ecb851-feb6-43f6-a946-97cdf70f4976"},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting huggingface_sb3\n","  Downloading huggingface_sb3-3.0-py3-none-any.whl (9.7 kB)\n","Collecting huggingface-hub~=0.8 (from huggingface_sb3)\n","  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pyyaml~=6.0 in /usr/local/lib/python3.10/dist-packages (from huggingface_sb3) (6.0.1)\n","Requirement already satisfied: wasabi in /usr/local/lib/python3.10/dist-packages (from huggingface_sb3) (1.1.2)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from huggingface_sb3) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.6 in /usr/local/lib/python3.10/dist-packages (from huggingface_sb3) (2.2.1)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (3.12.4)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (2023.6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (2.31.0)\n","Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (4.66.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (4.5.0)\n","Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (23.2)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (2023.7.22)\n","Installing collected packages: huggingface-hub, huggingface_sb3\n","Successfully installed huggingface-hub-0.18.0 huggingface_sb3-3.0\n","Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (0.18.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (3.12.4)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (2023.6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (2.31.0)\n","Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.66.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (6.0.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.5.0)\n","Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (23.2)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (2023.7.22)\n","Collecting panda_gym\n","  Downloading panda_gym-3.0.7-py3-none-any.whl (23 kB)\n","Requirement already satisfied: gymnasium>=0.26 in /usr/local/lib/python3.10/dist-packages (from panda_gym) (0.29.1)\n","Collecting pybullet (from panda_gym)\n","  Downloading pybullet-3.2.5.tar.gz (80.5 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.5/80.5 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from panda_gym) (1.23.5)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from panda_gym) (1.11.3)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium>=0.26->panda_gym) (2.2.1)\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium>=0.26->panda_gym) (4.5.0)\n","Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium>=0.26->panda_gym) (0.0.4)\n","Building wheels for collected packages: pybullet\n","  Building wheel for pybullet (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for pybullet: filename=pybullet-3.2.5-cp310-cp310-linux_x86_64.whl size=99850132 sha256=98a6b68785984d0f2c1b5cb18bc101d6d13f771e8825e0ca98af00c38d343a57\n","  Stored in directory: /root/.cache/pip/wheels/6b/fa/1a/c315a5133f0c9bf202a6daa5d70891120e7fe403e06e3407cc\n","Successfully built pybullet\n","Installing collected packages: pybullet, panda_gym\n","Successfully installed panda_gym-3.0.7 pybullet-3.2.5\n"]}],"source":["!pip install huggingface_sb3\n","!pip install huggingface_hub\n","!pip install panda_gym"]},{"cell_type":"markdown","metadata":{"id":"QTep3PQQABLr"},"source":["## Import the packages 📦"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"HpiB8VdnQ7Bk"},"outputs":[],"source":["import os\n","\n","import gymnasium as gym\n","import panda_gym\n","\n","from huggingface_sb3 import load_from_hub, package_to_hub\n","\n","from stable_baselines3 import A2C\n","from stable_baselines3.common.evaluation import evaluate_policy\n","from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n","from stable_baselines3.common.env_util import make_vec_env\n","\n","from huggingface_hub import notebook_login"]},{"cell_type":"markdown","metadata":{"id":"lfBwIS_oAVXI"},"source":["## PandaReachDense-v3 🦾\n","\n","The agent we're going to train is a robotic arm that needs to do controls (moving the arm and using the end-effector).\n","\n","In robotics, the *end-effector* is the device at the end of a robotic arm designed to interact with the environment.\n","\n","In `PandaReach`, the robot must place its end-effector at a target position (green ball).\n","\n","We're going to use the dense version of this environment. It means we'll get a *dense reward function* that **will provide a reward at each timestep** (the closer the agent is to completing the task, the higher the reward). Contrary to a *sparse reward function* where the environment **return a reward if and only if the task is completed**.\n","\n","Also, we're going to use the *End-effector displacement control*, it means the **action corresponds to the displacement of the end-effector**. We don't control the individual motion of each joint (joint control).\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit8/robotics.jpg\"  alt=\"Robotics\"/>\n","\n","\n","This way **the training will be easier**.\n","\n"]},{"cell_type":"markdown","metadata":{"id":"frVXOrnlBerQ"},"source":["### Create the environment\n","\n","#### The environment 🎮\n","\n","In `PandaReachDense-v3` the robotic arm must place its end-effector at a target position (green ball)."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zXzAu3HYF1WD"},"outputs":[],"source":["env_id = \"PandaReachDense-v3\"\n","\n","# Create the env\n","env = gym.make(env_id)\n","\n","# Get the state space and action space\n","s_size = env.observation_space.shape\n","a_size = env.action_space"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":242,"status":"ok","timestamp":1697794230186,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"E-U9dexcF-FB","outputId":"df255efb-1f0a-4e6c-be83-ffb580ea5c9a"},"outputs":[{"name":"stdout","output_type":"stream","text":["_____OBSERVATION SPACE_____ \n","\n","The State Space is:  None\n","Sample observation OrderedDict([('achieved_goal', array([ 0.36307505, -0.69471014, -6.307691  ], dtype=float32)), ('desired_goal', array([ 1.7523677 , -0.59887403,  8.890521  ], dtype=float32)), ('observation', array([-4.9718113, -2.5216153,  0.5674409,  5.2952337, -3.1501577,\n","       -1.5565605], dtype=float32))])\n"]}],"source":["print(\"_____OBSERVATION SPACE_____ \\n\")\n","print(\"The State Space is: \", s_size)\n","print(\"Sample observation\", env.observation_space.sample()) # Get a random observation"]},{"cell_type":"markdown","metadata":{"id":"g_JClfElGFnF"},"source":["The observation space **is a dictionary with 3 different elements**:\n","- `achieved_goal`: (x,y,z) position of the goal.\n","- `desired_goal`: (x,y,z) distance between the goal position and the current object position.\n","- `observation`: position (x,y,z) and velocity of the end-effector (vx, vy, vz).\n","\n","Given it's a dictionary as observation, **we will need to use a MultiInputPolicy policy instead of MlpPolicy**."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":246,"status":"ok","timestamp":1697794239423,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"ib1Kxy4AF-FC","outputId":"1f77638f-48ec-499b-8e29-f3e97b21fde1"},"outputs":[{"name":"stdout","output_type":"stream","text":["\n"," _____ACTION SPACE_____ \n","\n","The Action Space is:  Box(-1.0, 1.0, (3,), float32)\n","Action Space Sample [-0.3927638   0.39380783  0.7919843 ]\n"]}],"source":["print(\"\\n _____ACTION SPACE_____ \\n\")\n","print(\"The Action Space is: \", a_size)\n","print(\"Action Space Sample\", env.action_space.sample()) # Take a random action"]},{"cell_type":"markdown","metadata":{"id":"5MHTHEHZS4yp"},"source":["The action space is a vector with 3 values:\n","- Control x, y, z movement"]},{"cell_type":"markdown","metadata":{"id":"S5sXcg469ysB"},"source":["### Normalize observation and rewards"]},{"cell_type":"markdown","metadata":{"id":"1ZyX6qf3Zva9"},"source":["A good practice in reinforcement learning is to [normalize input features](https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html).\n","\n","For that purpose, there is a wrapper that will compute a running average and standard deviation of input features.\n","\n","We also normalize rewards with this same wrapper by adding `norm_reward = True`\n","\n","[You should check the documentation to fill this cell](https://stable-baselines3.readthedocs.io/en/master/guide/vec_envs.html#vecnormalize)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":633,"status":"ok","timestamp":1697794412785,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"1RsDtHHAQ9Ie","outputId":"483874a0-bb0e-492b-b0fb-daa1bb36f397"},"outputs":[{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n","  and should_run_async(code)\n"]}],"source":["env = make_vec_env(env_id, n_envs=4)\n","\n","# Adding this wrapper to normalize the observation and the reward\n","env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)"]},{"cell_type":"markdown","metadata":{"id":"tF42HvI7-gs5"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"2O67mqgC-hol"},"outputs":[],"source":["env = make_vec_env(env_id, n_envs=4)\n","\n","env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)"]},{"cell_type":"markdown","metadata":{"id":"4JmEVU6z1ZA-"},"source":["### Create the A2C Model 🤖\n","\n","For more information about A2C implementation with StableBaselines3 check: https://stable-baselines3.readthedocs.io/en/master/modules/a2c.html#notes\n","\n","To find the best parameters I checked the [official trained agents by Stable-Baselines3 team](https://huggingface.co/sb3)."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":8196,"status":"ok","timestamp":1697794632312,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"vR3T4qFt164I","outputId":"220ade29-7ee6-4ac2-d809-e897ebea5f30"},"outputs":[{"name":"stdout","output_type":"stream","text":["Using cuda device\n"]}],"source":["model = A2C(policy=\"MultiInputPolicy\", env=env, verbose=1)"]},{"cell_type":"markdown","metadata":{"id":"nWAuOOLh-oQf"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"FKFLY54T-pU1"},"outputs":[],"source":["model = A2C(policy = \"MultiInputPolicy\",\n","            env = env,\n","            verbose=1)"]},{"cell_type":"markdown","metadata":{"id":"opyK3mpJ1-m9"},"source":["### Train the A2C agent 🏃\n","- Let's train our agent for 1,000,000 timesteps, don't forget to use GPU on Colab. It will take approximately ~25-40min"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":293328,"status":"ok","timestamp":1697797343648,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"4TuGHZD7RF1G","outputId":"f9683cf1-cdb3-456b-9701-5393b764cafe"},"outputs":[{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n","  and should_run_async(code)\n"]},{"name":"stdout","output_type":"stream","text":["\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 22300    |\n","|    time_elapsed       | 1203     |\n","|    total_timesteps    | 446000   |\n","| train/                |          |\n","|    entropy_loss       | -1.59    |\n","|    explained_variance | 0.949    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22299    |\n","|    policy_loss        | 0.00934  |\n","|    std                | 0.423    |\n","|    value_loss         | 0.000212 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.9      |\n","|    ep_rew_mean        | -0.24    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 22400    |\n","|    time_elapsed       | 1209     |\n","|    total_timesteps    | 448000   |\n","| train/                |          |\n","|    entropy_loss       | -1.61    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22399    |\n","|    policy_loss        | -0.0015  |\n","|    std                | 0.425    |\n","|    value_loss         | 0.000121 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.72     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 22500    |\n","|    time_elapsed       | 1214     |\n","|    total_timesteps    | 450000   |\n","| train/                |          |\n","|    entropy_loss       | -1.61    |\n","|    explained_variance | 0.956    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22499    |\n","|    policy_loss        | -0.0133  |\n","|    std                | 0.425    |\n","|    value_loss         | 0.000201 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 22600    |\n","|    time_elapsed       | 1219     |\n","|    total_timesteps    | 452000   |\n","| train/                |          |\n","|    entropy_loss       | -1.59    |\n","|    explained_variance | 0.792    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22599    |\n","|    policy_loss        | -0.02    |\n","|    std                | 0.422    |\n","|    value_loss         | 0.00106  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.72     |\n","|    ep_rew_mean        | -0.209   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 22700    |\n","|    time_elapsed       | 1225     |\n","|    total_timesteps    | 454000   |\n","| train/                |          |\n","|    entropy_loss       | -1.6     |\n","|    explained_variance | 0.928    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22699    |\n","|    policy_loss        | 0.0197   |\n","|    std                | 0.423    |\n","|    value_loss         | 0.000372 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 22800    |\n","|    time_elapsed       | 1231     |\n","|    total_timesteps    | 456000   |\n","| train/                |          |\n","|    entropy_loss       | -1.59    |\n","|    explained_variance | 0.978    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22799    |\n","|    policy_loss        | 0.00215  |\n","|    std                | 0.423    |\n","|    value_loss         | 0.000168 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 22900    |\n","|    time_elapsed       | 1237     |\n","|    total_timesteps    | 458000   |\n","| train/                |          |\n","|    entropy_loss       | -1.57    |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22899    |\n","|    policy_loss        | 0.0323   |\n","|    std                | 0.419    |\n","|    value_loss         | 0.000441 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.84     |\n","|    ep_rew_mean        | -0.223   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 23000    |\n","|    time_elapsed       | 1241     |\n","|    total_timesteps    | 460000   |\n","| train/                |          |\n","|    entropy_loss       | -1.55    |\n","|    explained_variance | 0.986    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22999    |\n","|    policy_loss        | -0.0127  |\n","|    std                | 0.417    |\n","|    value_loss         | 0.000145 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.72     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 23100    |\n","|    time_elapsed       | 1246     |\n","|    total_timesteps    | 462000   |\n","| train/                |          |\n","|    entropy_loss       | -1.53    |\n","|    explained_variance | 0.97     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23099    |\n","|    policy_loss        | -0.00299 |\n","|    std                | 0.416    |\n","|    value_loss         | 0.000131 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.64     |\n","|    ep_rew_mean        | -0.199   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 23200    |\n","|    time_elapsed       | 1252     |\n","|    total_timesteps    | 464000   |\n","| train/                |          |\n","|    entropy_loss       | -1.52    |\n","|    explained_variance | 0.974    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23199    |\n","|    policy_loss        | -0.00295 |\n","|    std                | 0.415    |\n","|    value_loss         | 0.000226 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.96     |\n","|    ep_rew_mean        | -0.231   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 23300    |\n","|    time_elapsed       | 1257     |\n","|    total_timesteps    | 466000   |\n","| train/                |          |\n","|    entropy_loss       | -1.51    |\n","|    explained_variance | 0.738    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23299    |\n","|    policy_loss        | -0.0751  |\n","|    std                | 0.413    |\n","|    value_loss         | 0.00266  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 23400    |\n","|    time_elapsed       | 1263     |\n","|    total_timesteps    | 468000   |\n","| train/                |          |\n","|    entropy_loss       | -1.51    |\n","|    explained_variance | 0.96     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23399    |\n","|    policy_loss        | -0.00202 |\n","|    std                | 0.414    |\n","|    value_loss         | 0.000141 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.74      |\n","|    ep_rew_mean        | -0.211    |\n","| time/                 |           |\n","|    fps                | 370       |\n","|    iterations         | 23500     |\n","|    time_elapsed       | 1267      |\n","|    total_timesteps    | 470000    |\n","| train/                |           |\n","|    entropy_loss       | -1.51     |\n","|    explained_variance | 0.985     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 23499     |\n","|    policy_loss        | -0.000896 |\n","|    std                | 0.414     |\n","|    value_loss         | 0.0001    |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.82     |\n","|    ep_rew_mean        | -0.23    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 23600    |\n","|    time_elapsed       | 1272     |\n","|    total_timesteps    | 472000   |\n","| train/                |          |\n","|    entropy_loss       | -1.49    |\n","|    explained_variance | 0.85     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23599    |\n","|    policy_loss        | 0.0253   |\n","|    std                | 0.412    |\n","|    value_loss         | 0.000381 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.66     |\n","|    ep_rew_mean        | -0.199   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 23700    |\n","|    time_elapsed       | 1278     |\n","|    total_timesteps    | 474000   |\n","| train/                |          |\n","|    entropy_loss       | -1.49    |\n","|    explained_variance | 0.974    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23699    |\n","|    policy_loss        | 0.00574  |\n","|    std                | 0.412    |\n","|    value_loss         | 0.000193 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.93     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 23800    |\n","|    time_elapsed       | 1283     |\n","|    total_timesteps    | 476000   |\n","| train/                |          |\n","|    entropy_loss       | -1.49    |\n","|    explained_variance | 0.946    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23799    |\n","|    policy_loss        | 0.0576   |\n","|    std                | 0.414    |\n","|    value_loss         | 0.000613 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.85     |\n","|    ep_rew_mean        | -0.228   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 23900    |\n","|    time_elapsed       | 1289     |\n","|    total_timesteps    | 478000   |\n","| train/                |          |\n","|    entropy_loss       | -1.5     |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23899    |\n","|    policy_loss        | 0.0135   |\n","|    std                | 0.415    |\n","|    value_loss         | 0.000219 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.81     |\n","|    ep_rew_mean        | -0.214   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 24000    |\n","|    time_elapsed       | 1294     |\n","|    total_timesteps    | 480000   |\n","| train/                |          |\n","|    entropy_loss       | -1.49    |\n","|    explained_variance | 0.903    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23999    |\n","|    policy_loss        | 0.0169   |\n","|    std                | 0.414    |\n","|    value_loss         | 0.000477 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.212   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 24100    |\n","|    time_elapsed       | 1298     |\n","|    total_timesteps    | 482000   |\n","| train/                |          |\n","|    entropy_loss       | -1.48    |\n","|    explained_variance | 0.908    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24099    |\n","|    policy_loss        | -0.00448 |\n","|    std                | 0.412    |\n","|    value_loss         | 0.000677 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.214   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 24200    |\n","|    time_elapsed       | 1304     |\n","|    total_timesteps    | 484000   |\n","| train/                |          |\n","|    entropy_loss       | -1.47    |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24199    |\n","|    policy_loss        | 0.0186   |\n","|    std                | 0.411    |\n","|    value_loss         | 0.000364 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.77     |\n","|    ep_rew_mean        | -0.214   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 24300    |\n","|    time_elapsed       | 1309     |\n","|    total_timesteps    | 486000   |\n","| train/                |          |\n","|    entropy_loss       | -1.46    |\n","|    explained_variance | 0.932    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24299    |\n","|    policy_loss        | -0.00132 |\n","|    std                | 0.41     |\n","|    value_loss         | 0.000166 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.215   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 24400    |\n","|    time_elapsed       | 1315     |\n","|    total_timesteps    | 488000   |\n","| train/                |          |\n","|    entropy_loss       | -1.48    |\n","|    explained_variance | 0.947    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24399    |\n","|    policy_loss        | -0.00102 |\n","|    std                | 0.413    |\n","|    value_loss         | 0.000159 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.52     |\n","|    ep_rew_mean        | -0.195   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 24500    |\n","|    time_elapsed       | 1320     |\n","|    total_timesteps    | 490000   |\n","| train/                |          |\n","|    entropy_loss       | -1.48    |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24499    |\n","|    policy_loss        | -0.0149  |\n","|    std                | 0.413    |\n","|    value_loss         | 0.000194 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.201   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 24600    |\n","|    time_elapsed       | 1325     |\n","|    total_timesteps    | 492000   |\n","| train/                |          |\n","|    entropy_loss       | -1.48    |\n","|    explained_variance | 0.938    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24599    |\n","|    policy_loss        | -0.0183  |\n","|    std                | 0.413    |\n","|    value_loss         | 0.000225 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.82     |\n","|    ep_rew_mean        | -0.225   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 24700    |\n","|    time_elapsed       | 1330     |\n","|    total_timesteps    | 494000   |\n","| train/                |          |\n","|    entropy_loss       | -1.46    |\n","|    explained_variance | 0.962    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24699    |\n","|    policy_loss        | 0.00995  |\n","|    std                | 0.411    |\n","|    value_loss         | 0.000238 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 24800    |\n","|    time_elapsed       | 1335     |\n","|    total_timesteps    | 496000   |\n","| train/                |          |\n","|    entropy_loss       | -1.45    |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24799    |\n","|    policy_loss        | -0.00953 |\n","|    std                | 0.41     |\n","|    value_loss         | 0.000177 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.64     |\n","|    ep_rew_mean        | -0.214   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 24900    |\n","|    time_elapsed       | 1341     |\n","|    total_timesteps    | 498000   |\n","| train/                |          |\n","|    entropy_loss       | -1.42    |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24899    |\n","|    policy_loss        | 0.000315 |\n","|    std                | 0.406    |\n","|    value_loss         | 5.8e-05  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.63     |\n","|    ep_rew_mean        | -0.198   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 25000    |\n","|    time_elapsed       | 1346     |\n","|    total_timesteps    | 500000   |\n","| train/                |          |\n","|    entropy_loss       | -1.38    |\n","|    explained_variance | 0.919    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24999    |\n","|    policy_loss        | -0.0244  |\n","|    std                | 0.401    |\n","|    value_loss         | 0.000709 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.69      |\n","|    ep_rew_mean        | -0.204    |\n","| time/                 |           |\n","|    fps                | 371       |\n","|    iterations         | 25100     |\n","|    time_elapsed       | 1351      |\n","|    total_timesteps    | 502000    |\n","| train/                |           |\n","|    entropy_loss       | -1.38     |\n","|    explained_variance | 0.957     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 25099     |\n","|    policy_loss        | -0.000708 |\n","|    std                | 0.402     |\n","|    value_loss         | 0.000231  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.95     |\n","|    ep_rew_mean        | -0.227   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 25200    |\n","|    time_elapsed       | 1357     |\n","|    total_timesteps    | 504000   |\n","| train/                |          |\n","|    entropy_loss       | -1.37    |\n","|    explained_variance | 0.954    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25199    |\n","|    policy_loss        | 0.0245   |\n","|    std                | 0.4      |\n","|    value_loss         | 0.00044  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.84     |\n","|    ep_rew_mean        | -0.221   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 25300    |\n","|    time_elapsed       | 1362     |\n","|    total_timesteps    | 506000   |\n","| train/                |          |\n","|    entropy_loss       | -1.39    |\n","|    explained_variance | 0.965    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25299    |\n","|    policy_loss        | 0.00448  |\n","|    std                | 0.402    |\n","|    value_loss         | 0.000133 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.81     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 25400    |\n","|    time_elapsed       | 1367     |\n","|    total_timesteps    | 508000   |\n","| train/                |          |\n","|    entropy_loss       | -1.4     |\n","|    explained_variance | 0.364    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25399    |\n","|    policy_loss        | -0.0354  |\n","|    std                | 0.405    |\n","|    value_loss         | 0.00417  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.8      |\n","|    ep_rew_mean        | -0.218   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 25500    |\n","|    time_elapsed       | 1373     |\n","|    total_timesteps    | 510000   |\n","| train/                |          |\n","|    entropy_loss       | -1.39    |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25499    |\n","|    policy_loss        | -0.00203 |\n","|    std                | 0.405    |\n","|    value_loss         | 0.000139 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.85     |\n","|    ep_rew_mean        | -0.223   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 25600    |\n","|    time_elapsed       | 1377     |\n","|    total_timesteps    | 512000   |\n","| train/                |          |\n","|    entropy_loss       | -1.39    |\n","|    explained_variance | 0.712    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25599    |\n","|    policy_loss        | 0.00121  |\n","|    std                | 0.403    |\n","|    value_loss         | 0.00179  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.219   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 25700    |\n","|    time_elapsed       | 1383     |\n","|    total_timesteps    | 514000   |\n","| train/                |          |\n","|    entropy_loss       | -1.36    |\n","|    explained_variance | 0.932    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25699    |\n","|    policy_loss        | -0.00428 |\n","|    std                | 0.401    |\n","|    value_loss         | 0.000315 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.85     |\n","|    ep_rew_mean        | -0.224   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 25800    |\n","|    time_elapsed       | 1388     |\n","|    total_timesteps    | 516000   |\n","| train/                |          |\n","|    entropy_loss       | -1.36    |\n","|    explained_variance | 0.208    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25799    |\n","|    policy_loss        | 0.0133   |\n","|    std                | 0.401    |\n","|    value_loss         | 0.00162  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.72     |\n","|    ep_rew_mean        | -0.212   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 25900    |\n","|    time_elapsed       | 1394     |\n","|    total_timesteps    | 518000   |\n","| train/                |          |\n","|    entropy_loss       | -1.36    |\n","|    explained_variance | 0.805    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25899    |\n","|    policy_loss        | 0.00214  |\n","|    std                | 0.4      |\n","|    value_loss         | 0.00065  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.81     |\n","|    ep_rew_mean        | -0.225   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26000    |\n","|    time_elapsed       | 1399     |\n","|    total_timesteps    | 520000   |\n","| train/                |          |\n","|    entropy_loss       | -1.34    |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25999    |\n","|    policy_loss        | 0.0111   |\n","|    std                | 0.398    |\n","|    value_loss         | 0.000262 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.97     |\n","|    ep_rew_mean        | -0.25    |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26100    |\n","|    time_elapsed       | 1404     |\n","|    total_timesteps    | 522000   |\n","| train/                |          |\n","|    entropy_loss       | -1.33    |\n","|    explained_variance | 0.98     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26099    |\n","|    policy_loss        | 0.00785  |\n","|    std                | 0.398    |\n","|    value_loss         | 0.000212 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.81     |\n","|    ep_rew_mean        | -0.219   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26200    |\n","|    time_elapsed       | 1410     |\n","|    total_timesteps    | 524000   |\n","| train/                |          |\n","|    entropy_loss       | -1.31    |\n","|    explained_variance | 0.921    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26199    |\n","|    policy_loss        | -0.00498 |\n","|    std                | 0.395    |\n","|    value_loss         | 0.000306 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.58     |\n","|    ep_rew_mean        | -0.199   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26300    |\n","|    time_elapsed       | 1415     |\n","|    total_timesteps    | 526000   |\n","| train/                |          |\n","|    entropy_loss       | -1.29    |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26299    |\n","|    policy_loss        | -0.00204 |\n","|    std                | 0.393    |\n","|    value_loss         | 6.63e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.56     |\n","|    ep_rew_mean        | -0.199   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26400    |\n","|    time_elapsed       | 1421     |\n","|    total_timesteps    | 528000   |\n","| train/                |          |\n","|    entropy_loss       | -1.29    |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26399    |\n","|    policy_loss        | 0.00476  |\n","|    std                | 0.393    |\n","|    value_loss         | 0.000169 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.202   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26500    |\n","|    time_elapsed       | 1426     |\n","|    total_timesteps    | 530000   |\n","| train/                |          |\n","|    entropy_loss       | -1.26    |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26499    |\n","|    policy_loss        | -0.00918 |\n","|    std                | 0.389    |\n","|    value_loss         | 0.000276 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.89     |\n","|    ep_rew_mean        | -0.233   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26600    |\n","|    time_elapsed       | 1431     |\n","|    total_timesteps    | 532000   |\n","| train/                |          |\n","|    entropy_loss       | -1.26    |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26599    |\n","|    policy_loss        | -0.00635 |\n","|    std                | 0.389    |\n","|    value_loss         | 0.000139 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26700    |\n","|    time_elapsed       | 1437     |\n","|    total_timesteps    | 534000   |\n","| train/                |          |\n","|    entropy_loss       | -1.26    |\n","|    explained_variance | 0.575    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26699    |\n","|    policy_loss        | -0.0391  |\n","|    std                | 0.387    |\n","|    value_loss         | 0.00261  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.66     |\n","|    ep_rew_mean        | -0.202   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26800    |\n","|    time_elapsed       | 1442     |\n","|    total_timesteps    | 536000   |\n","| train/                |          |\n","|    entropy_loss       | -1.25    |\n","|    explained_variance | 0.962    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26799    |\n","|    policy_loss        | 0.0173   |\n","|    std                | 0.387    |\n","|    value_loss         | 0.000495 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.6      |\n","|    ep_rew_mean        | -0.193   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 26900    |\n","|    time_elapsed       | 1448     |\n","|    total_timesteps    | 538000   |\n","| train/                |          |\n","|    entropy_loss       | -1.22    |\n","|    explained_variance | 0.972    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26899    |\n","|    policy_loss        | 0.00954  |\n","|    std                | 0.383    |\n","|    value_loss         | 0.000204 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.89     |\n","|    ep_rew_mean        | -0.224   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 27000    |\n","|    time_elapsed       | 1453     |\n","|    total_timesteps    | 540000   |\n","| train/                |          |\n","|    entropy_loss       | -1.21    |\n","|    explained_variance | 0.951    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26999    |\n","|    policy_loss        | -0.00556 |\n","|    std                | 0.382    |\n","|    value_loss         | 0.000222 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.9      |\n","|    ep_rew_mean        | -0.242   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 27100    |\n","|    time_elapsed       | 1458     |\n","|    total_timesteps    | 542000   |\n","| train/                |          |\n","|    entropy_loss       | -1.2     |\n","|    explained_variance | 0.957    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27099    |\n","|    policy_loss        | -0.00364 |\n","|    std                | 0.381    |\n","|    value_loss         | 0.000258 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.93     |\n","|    ep_rew_mean        | -0.239   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 27200    |\n","|    time_elapsed       | 1464     |\n","|    total_timesteps    | 544000   |\n","| train/                |          |\n","|    entropy_loss       | -1.2     |\n","|    explained_variance | 0.965    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27199    |\n","|    policy_loss        | -0.00378 |\n","|    std                | 0.382    |\n","|    value_loss         | 0.000146 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.88     |\n","|    ep_rew_mean        | -0.229   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 27300    |\n","|    time_elapsed       | 1469     |\n","|    total_timesteps    | 546000   |\n","| train/                |          |\n","|    entropy_loss       | -1.21    |\n","|    explained_variance | 0.949    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27299    |\n","|    policy_loss        | -0.0069  |\n","|    std                | 0.382    |\n","|    value_loss         | 0.000229 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.204   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 27400    |\n","|    time_elapsed       | 1475     |\n","|    total_timesteps    | 548000   |\n","| train/                |          |\n","|    entropy_loss       | -1.2     |\n","|    explained_variance | 0.973    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27399    |\n","|    policy_loss        | -0.00456 |\n","|    std                | 0.382    |\n","|    value_loss         | 0.000196 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.92     |\n","|    ep_rew_mean        | -0.237   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 27500    |\n","|    time_elapsed       | 1480     |\n","|    total_timesteps    | 550000   |\n","| train/                |          |\n","|    entropy_loss       | -1.19    |\n","|    explained_variance | 0.966    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27499    |\n","|    policy_loss        | 0.00336  |\n","|    std                | 0.381    |\n","|    value_loss         | 0.000271 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.66     |\n","|    ep_rew_mean        | -0.201   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 27600    |\n","|    time_elapsed       | 1485     |\n","|    total_timesteps    | 552000   |\n","| train/                |          |\n","|    entropy_loss       | -1.2     |\n","|    explained_variance | 0.981    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27599    |\n","|    policy_loss        | 0.000766 |\n","|    std                | 0.382    |\n","|    value_loss         | 0.000147 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.57     |\n","|    ep_rew_mean        | -0.201   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 27700    |\n","|    time_elapsed       | 1491     |\n","|    total_timesteps    | 554000   |\n","| train/                |          |\n","|    entropy_loss       | -1.18    |\n","|    explained_variance | 0.959    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27699    |\n","|    policy_loss        | -0.0173  |\n","|    std                | 0.379    |\n","|    value_loss         | 0.000264 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.77     |\n","|    ep_rew_mean        | -0.209   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 27800    |\n","|    time_elapsed       | 1496     |\n","|    total_timesteps    | 556000   |\n","| train/                |          |\n","|    entropy_loss       | -1.17    |\n","|    explained_variance | 0.949    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27799    |\n","|    policy_loss        | 0.000551 |\n","|    std                | 0.378    |\n","|    value_loss         | 0.00016  |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.62      |\n","|    ep_rew_mean        | -0.2      |\n","| time/                 |           |\n","|    fps                | 371       |\n","|    iterations         | 27900     |\n","|    time_elapsed       | 1502      |\n","|    total_timesteps    | 558000    |\n","| train/                |           |\n","|    entropy_loss       | -1.14     |\n","|    explained_variance | 0.941     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 27899     |\n","|    policy_loss        | -0.000696 |\n","|    std                | 0.375     |\n","|    value_loss         | 0.000233  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.92     |\n","|    ep_rew_mean        | -0.228   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 28000    |\n","|    time_elapsed       | 1507     |\n","|    total_timesteps    | 560000   |\n","| train/                |          |\n","|    entropy_loss       | -1.17    |\n","|    explained_variance | 0.612    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27999    |\n","|    policy_loss        | -0.0643  |\n","|    std                | 0.378    |\n","|    value_loss         | 0.00586  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.202   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 28100    |\n","|    time_elapsed       | 1513     |\n","|    total_timesteps    | 562000   |\n","| train/                |          |\n","|    entropy_loss       | -1.16    |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28099    |\n","|    policy_loss        | 0.0239   |\n","|    std                | 0.377    |\n","|    value_loss         | 0.000301 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.215   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 28200    |\n","|    time_elapsed       | 1519     |\n","|    total_timesteps    | 564000   |\n","| train/                |          |\n","|    entropy_loss       | -1.15    |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28199    |\n","|    policy_loss        | -0.0206  |\n","|    std                | 0.375    |\n","|    value_loss         | 0.000195 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.202   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 28300    |\n","|    time_elapsed       | 1524     |\n","|    total_timesteps    | 566000   |\n","| train/                |          |\n","|    entropy_loss       | -1.13    |\n","|    explained_variance | 0.97     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28299    |\n","|    policy_loss        | -0.00922 |\n","|    std                | 0.373    |\n","|    value_loss         | 0.000213 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.97     |\n","|    ep_rew_mean        | -0.237   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 28400    |\n","|    time_elapsed       | 1530     |\n","|    total_timesteps    | 568000   |\n","| train/                |          |\n","|    entropy_loss       | -1.12    |\n","|    explained_variance | 0.972    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28399    |\n","|    policy_loss        | 0.0177   |\n","|    std                | 0.373    |\n","|    value_loss         | 0.000324 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.216   |\n","| time/                 |          |\n","|    fps                | 371      |\n","|    iterations         | 28500    |\n","|    time_elapsed       | 1535     |\n","|    total_timesteps    | 570000   |\n","| train/                |          |\n","|    entropy_loss       | -1.12    |\n","|    explained_variance | 0.944    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28499    |\n","|    policy_loss        | -0.00116 |\n","|    std                | 0.373    |\n","|    value_loss         | 0.000274 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.85     |\n","|    ep_rew_mean        | -0.216   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 28600    |\n","|    time_elapsed       | 1542     |\n","|    total_timesteps    | 572000   |\n","| train/                |          |\n","|    entropy_loss       | -1.12    |\n","|    explained_variance | 0.574    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28599    |\n","|    policy_loss        | -0.0423  |\n","|    std                | 0.374    |\n","|    value_loss         | 0.00667  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.88     |\n","|    ep_rew_mean        | -0.231   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 28700    |\n","|    time_elapsed       | 1547     |\n","|    total_timesteps    | 574000   |\n","| train/                |          |\n","|    entropy_loss       | -1.12    |\n","|    explained_variance | 0.947    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28699    |\n","|    policy_loss        | -0.0191  |\n","|    std                | 0.374    |\n","|    value_loss         | 0.0006   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.7      |\n","|    ep_rew_mean        | -0.205   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 28800    |\n","|    time_elapsed       | 1553     |\n","|    total_timesteps    | 576000   |\n","| train/                |          |\n","|    entropy_loss       | -1.1     |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28799    |\n","|    policy_loss        | -0.00246 |\n","|    std                | 0.372    |\n","|    value_loss         | 0.000154 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.83     |\n","|    ep_rew_mean        | -0.226   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 28900    |\n","|    time_elapsed       | 1558     |\n","|    total_timesteps    | 578000   |\n","| train/                |          |\n","|    entropy_loss       | -1.11    |\n","|    explained_variance | 0.943    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28899    |\n","|    policy_loss        | -0.00343 |\n","|    std                | 0.373    |\n","|    value_loss         | 0.000237 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.82     |\n","|    ep_rew_mean        | -0.225   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 29000    |\n","|    time_elapsed       | 1563     |\n","|    total_timesteps    | 580000   |\n","| train/                |          |\n","|    entropy_loss       | -1.12    |\n","|    explained_variance | 0.966    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28999    |\n","|    policy_loss        | 0.00808  |\n","|    std                | 0.374    |\n","|    value_loss         | 0.000176 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.64     |\n","|    ep_rew_mean        | -0.195   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 29100    |\n","|    time_elapsed       | 1569     |\n","|    total_timesteps    | 582000   |\n","| train/                |          |\n","|    entropy_loss       | -1.09    |\n","|    explained_variance | 0.798    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29099    |\n","|    policy_loss        | -0.0217  |\n","|    std                | 0.372    |\n","|    value_loss         | 0.00186  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 3.2      |\n","|    ep_rew_mean        | -0.252   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 29200    |\n","|    time_elapsed       | 1574     |\n","|    total_timesteps    | 584000   |\n","| train/                |          |\n","|    entropy_loss       | -1.11    |\n","|    explained_variance | 0.434    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29199    |\n","|    policy_loss        | 0.0171   |\n","|    std                | 0.374    |\n","|    value_loss         | 0.00356  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.81     |\n","|    ep_rew_mean        | -0.218   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 29300    |\n","|    time_elapsed       | 1581     |\n","|    total_timesteps    | 586000   |\n","| train/                |          |\n","|    entropy_loss       | -1.1     |\n","|    explained_variance | 0.969    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29299    |\n","|    policy_loss        | 0.00606  |\n","|    std                | 0.373    |\n","|    value_loss         | 0.000466 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.84     |\n","|    ep_rew_mean        | -0.222   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 29400    |\n","|    time_elapsed       | 1586     |\n","|    total_timesteps    | 588000   |\n","| train/                |          |\n","|    entropy_loss       | -1.1     |\n","|    explained_variance | 0.941    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29399    |\n","|    policy_loss        | 0.0222   |\n","|    std                | 0.374    |\n","|    value_loss         | 0.000908 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.82     |\n","|    ep_rew_mean        | -0.22    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 29500    |\n","|    time_elapsed       | 1591     |\n","|    total_timesteps    | 590000   |\n","| train/                |          |\n","|    entropy_loss       | -1.1     |\n","|    explained_variance | 0.975    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29499    |\n","|    policy_loss        | 0.000126 |\n","|    std                | 0.373    |\n","|    value_loss         | 6.21e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.93     |\n","|    ep_rew_mean        | -0.236   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 29600    |\n","|    time_elapsed       | 1597     |\n","|    total_timesteps    | 592000   |\n","| train/                |          |\n","|    entropy_loss       | -1.09    |\n","|    explained_variance | 0.986    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29599    |\n","|    policy_loss        | -0.0107  |\n","|    std                | 0.372    |\n","|    value_loss         | 0.000203 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 3.26      |\n","|    ep_rew_mean        | -0.264    |\n","| time/                 |           |\n","|    fps                | 370       |\n","|    iterations         | 29700     |\n","|    time_elapsed       | 1602      |\n","|    total_timesteps    | 594000    |\n","| train/                |           |\n","|    entropy_loss       | -1.09     |\n","|    explained_variance | 0.894     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 29699     |\n","|    policy_loss        | -2.96e-05 |\n","|    std                | 0.372     |\n","|    value_loss         | 0.00053   |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 29800    |\n","|    time_elapsed       | 1608     |\n","|    total_timesteps    | 596000   |\n","| train/                |          |\n","|    entropy_loss       | -1.07    |\n","|    explained_variance | 0.933    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29799    |\n","|    policy_loss        | -0.00366 |\n","|    std                | 0.369    |\n","|    value_loss         | 0.000264 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 3.13     |\n","|    ep_rew_mean        | -0.242   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 29900    |\n","|    time_elapsed       | 1613     |\n","|    total_timesteps    | 598000   |\n","| train/                |          |\n","|    entropy_loss       | -1.07    |\n","|    explained_variance | 0.956    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29899    |\n","|    policy_loss        | 0.0225   |\n","|    std                | 0.369    |\n","|    value_loss         | 0.000667 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.202   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30000    |\n","|    time_elapsed       | 1619     |\n","|    total_timesteps    | 600000   |\n","| train/                |          |\n","|    entropy_loss       | -1.05    |\n","|    explained_variance | 0.941    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29999    |\n","|    policy_loss        | 0.0245   |\n","|    std                | 0.367    |\n","|    value_loss         | 0.00122  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.7      |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30100    |\n","|    time_elapsed       | 1624     |\n","|    total_timesteps    | 602000   |\n","| train/                |          |\n","|    entropy_loss       | -1.03    |\n","|    explained_variance | 0.972    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30099    |\n","|    policy_loss        | 0.0033   |\n","|    std                | 0.365    |\n","|    value_loss         | 0.000116 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.85     |\n","|    ep_rew_mean        | -0.218   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30200    |\n","|    time_elapsed       | 1629     |\n","|    total_timesteps    | 604000   |\n","| train/                |          |\n","|    entropy_loss       | -1.03    |\n","|    explained_variance | 0.84     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30199    |\n","|    policy_loss        | -0.0157  |\n","|    std                | 0.365    |\n","|    value_loss         | 0.000663 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.91     |\n","|    ep_rew_mean        | -0.23    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30300    |\n","|    time_elapsed       | 1635     |\n","|    total_timesteps    | 606000   |\n","| train/                |          |\n","|    entropy_loss       | -1.01    |\n","|    explained_variance | 0.945    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30299    |\n","|    policy_loss        | -0.00954 |\n","|    std                | 0.362    |\n","|    value_loss         | 0.000363 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.77     |\n","|    ep_rew_mean        | -0.22    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30400    |\n","|    time_elapsed       | 1640     |\n","|    total_timesteps    | 608000   |\n","| train/                |          |\n","|    entropy_loss       | -0.993   |\n","|    explained_variance | 0.952    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30399    |\n","|    policy_loss        | -0.00869 |\n","|    std                | 0.361    |\n","|    value_loss         | 0.000228 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.94     |\n","|    ep_rew_mean        | -0.236   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30500    |\n","|    time_elapsed       | 1646     |\n","|    total_timesteps    | 610000   |\n","| train/                |          |\n","|    entropy_loss       | -0.945   |\n","|    explained_variance | 0.748    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30499    |\n","|    policy_loss        | -0.0418  |\n","|    std                | 0.355    |\n","|    value_loss         | 0.00359  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.85     |\n","|    ep_rew_mean        | -0.228   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30600    |\n","|    time_elapsed       | 1651     |\n","|    total_timesteps    | 612000   |\n","| train/                |          |\n","|    entropy_loss       | -0.926   |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30599    |\n","|    policy_loss        | 0.00479  |\n","|    std                | 0.353    |\n","|    value_loss         | 0.000425 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.88     |\n","|    ep_rew_mean        | -0.226   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30700    |\n","|    time_elapsed       | 1656     |\n","|    total_timesteps    | 614000   |\n","| train/                |          |\n","|    entropy_loss       | -0.926   |\n","|    explained_variance | 0.693    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30699    |\n","|    policy_loss        | -0.014   |\n","|    std                | 0.353    |\n","|    value_loss         | 0.00214  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.218   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30800    |\n","|    time_elapsed       | 1662     |\n","|    total_timesteps    | 616000   |\n","| train/                |          |\n","|    entropy_loss       | -0.917   |\n","|    explained_variance | 0.849    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30799    |\n","|    policy_loss        | 0.00243  |\n","|    std                | 0.352    |\n","|    value_loss         | 0.00164  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.204   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 30900    |\n","|    time_elapsed       | 1667     |\n","|    total_timesteps    | 618000   |\n","| train/                |          |\n","|    entropy_loss       | -0.898   |\n","|    explained_variance | 0.962    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30899    |\n","|    policy_loss        | -0.00587 |\n","|    std                | 0.349    |\n","|    value_loss         | 0.000291 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.218   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31000    |\n","|    time_elapsed       | 1673     |\n","|    total_timesteps    | 620000   |\n","| train/                |          |\n","|    entropy_loss       | -0.925   |\n","|    explained_variance | 0.957    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30999    |\n","|    policy_loss        | -0.0221  |\n","|    std                | 0.352    |\n","|    value_loss         | 0.000565 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31100    |\n","|    time_elapsed       | 1677     |\n","|    total_timesteps    | 622000   |\n","| train/                |          |\n","|    entropy_loss       | -0.925   |\n","|    explained_variance | 0.956    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31099    |\n","|    policy_loss        | -0.00369 |\n","|    std                | 0.352    |\n","|    value_loss         | 0.000136 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.59     |\n","|    ep_rew_mean        | -0.194   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31200    |\n","|    time_elapsed       | 1682     |\n","|    total_timesteps    | 624000   |\n","| train/                |          |\n","|    entropy_loss       | -0.915   |\n","|    explained_variance | 0.972    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31199    |\n","|    policy_loss        | 0.00117  |\n","|    std                | 0.351    |\n","|    value_loss         | 0.000228 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.8      |\n","|    ep_rew_mean        | -0.22    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31300    |\n","|    time_elapsed       | 1688     |\n","|    total_timesteps    | 626000   |\n","| train/                |          |\n","|    entropy_loss       | -0.902   |\n","|    explained_variance | 0.967    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31299    |\n","|    policy_loss        | 0.00434  |\n","|    std                | 0.35     |\n","|    value_loss         | 0.000194 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.53     |\n","|    ep_rew_mean        | -0.18    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31400    |\n","|    time_elapsed       | 1693     |\n","|    total_timesteps    | 628000   |\n","| train/                |          |\n","|    entropy_loss       | -0.889   |\n","|    explained_variance | 0.965    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31399    |\n","|    policy_loss        | -0.00948 |\n","|    std                | 0.349    |\n","|    value_loss         | 0.000186 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31500    |\n","|    time_elapsed       | 1699     |\n","|    total_timesteps    | 630000   |\n","| train/                |          |\n","|    entropy_loss       | -0.868   |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31499    |\n","|    policy_loss        | -0.00141 |\n","|    std                | 0.346    |\n","|    value_loss         | 0.000148 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.64     |\n","|    ep_rew_mean        | -0.194   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31600    |\n","|    time_elapsed       | 1704     |\n","|    total_timesteps    | 632000   |\n","| train/                |          |\n","|    entropy_loss       | -0.852   |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31599    |\n","|    policy_loss        | 0.00312  |\n","|    std                | 0.345    |\n","|    value_loss         | 7.1e-05  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.49     |\n","|    ep_rew_mean        | -0.178   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31700    |\n","|    time_elapsed       | 1709     |\n","|    total_timesteps    | 634000   |\n","| train/                |          |\n","|    entropy_loss       | -0.85    |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31699    |\n","|    policy_loss        | 0.000295 |\n","|    std                | 0.345    |\n","|    value_loss         | 0.000322 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.8      |\n","|    ep_rew_mean        | -0.221   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31800    |\n","|    time_elapsed       | 1715     |\n","|    total_timesteps    | 636000   |\n","| train/                |          |\n","|    entropy_loss       | -0.843   |\n","|    explained_variance | 0.965    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31799    |\n","|    policy_loss        | -0.0121  |\n","|    std                | 0.345    |\n","|    value_loss         | 0.000503 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.216   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 31900    |\n","|    time_elapsed       | 1720     |\n","|    total_timesteps    | 638000   |\n","| train/                |          |\n","|    entropy_loss       | -0.81    |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31899    |\n","|    policy_loss        | -0.00257 |\n","|    std                | 0.341    |\n","|    value_loss         | 0.000209 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.205   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32000    |\n","|    time_elapsed       | 1727     |\n","|    total_timesteps    | 640000   |\n","| train/                |          |\n","|    entropy_loss       | -0.773   |\n","|    explained_variance | 0.967    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31999    |\n","|    policy_loss        | 0.0135   |\n","|    std                | 0.336    |\n","|    value_loss         | 0.000448 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.72     |\n","|    ep_rew_mean        | -0.212   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32100    |\n","|    time_elapsed       | 1733     |\n","|    total_timesteps    | 642000   |\n","| train/                |          |\n","|    entropy_loss       | -0.781   |\n","|    explained_variance | 0.922    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32099    |\n","|    policy_loss        | -0.00758 |\n","|    std                | 0.337    |\n","|    value_loss         | 0.000887 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.62     |\n","|    ep_rew_mean        | -0.198   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32200    |\n","|    time_elapsed       | 1738     |\n","|    total_timesteps    | 644000   |\n","| train/                |          |\n","|    entropy_loss       | -0.759   |\n","|    explained_variance | 0.951    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32199    |\n","|    policy_loss        | -0.00883 |\n","|    std                | 0.335    |\n","|    value_loss         | 0.000575 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.205   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32300    |\n","|    time_elapsed       | 1744     |\n","|    total_timesteps    | 646000   |\n","| train/                |          |\n","|    entropy_loss       | -0.745   |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32299    |\n","|    policy_loss        | -0.0011  |\n","|    std                | 0.334    |\n","|    value_loss         | 8.33e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.222   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32400    |\n","|    time_elapsed       | 1749     |\n","|    total_timesteps    | 648000   |\n","| train/                |          |\n","|    entropy_loss       | -0.758   |\n","|    explained_variance | 0.891    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32399    |\n","|    policy_loss        | 0.00367  |\n","|    std                | 0.336    |\n","|    value_loss         | 0.00107  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32500    |\n","|    time_elapsed       | 1755     |\n","|    total_timesteps    | 650000   |\n","| train/                |          |\n","|    entropy_loss       | -0.767   |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32499    |\n","|    policy_loss        | -0.00398 |\n","|    std                | 0.337    |\n","|    value_loss         | 0.000223 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.66     |\n","|    ep_rew_mean        | -0.201   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32600    |\n","|    time_elapsed       | 1760     |\n","|    total_timesteps    | 652000   |\n","| train/                |          |\n","|    entropy_loss       | -0.778   |\n","|    explained_variance | 0.962    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32599    |\n","|    policy_loss        | 0.00605  |\n","|    std                | 0.338    |\n","|    value_loss         | 0.000407 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.79     |\n","|    ep_rew_mean        | -0.213   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32700    |\n","|    time_elapsed       | 1766     |\n","|    total_timesteps    | 654000   |\n","| train/                |          |\n","|    entropy_loss       | -0.768   |\n","|    explained_variance | 0.951    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32699    |\n","|    policy_loss        | -0.00468 |\n","|    std                | 0.338    |\n","|    value_loss         | 0.00026  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.206   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32800    |\n","|    time_elapsed       | 1771     |\n","|    total_timesteps    | 656000   |\n","| train/                |          |\n","|    entropy_loss       | -0.786   |\n","|    explained_variance | 0.956    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32799    |\n","|    policy_loss        | 0.00403  |\n","|    std                | 0.339    |\n","|    value_loss         | 0.000409 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 32900    |\n","|    time_elapsed       | 1776     |\n","|    total_timesteps    | 658000   |\n","| train/                |          |\n","|    entropy_loss       | -0.778   |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32899    |\n","|    policy_loss        | 0.00136  |\n","|    std                | 0.339    |\n","|    value_loss         | 9.21e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.77     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 33000    |\n","|    time_elapsed       | 1782     |\n","|    total_timesteps    | 660000   |\n","| train/                |          |\n","|    entropy_loss       | -0.758   |\n","|    explained_variance | 0.887    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32999    |\n","|    policy_loss        | 0.0131   |\n","|    std                | 0.337    |\n","|    value_loss         | 0.000436 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.85     |\n","|    ep_rew_mean        | -0.224   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 33100    |\n","|    time_elapsed       | 1787     |\n","|    total_timesteps    | 662000   |\n","| train/                |          |\n","|    entropy_loss       | -0.758   |\n","|    explained_variance | 0.801    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33099    |\n","|    policy_loss        | -0.0263  |\n","|    std                | 0.337    |\n","|    value_loss         | 0.00151  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.209   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 33200    |\n","|    time_elapsed       | 1793     |\n","|    total_timesteps    | 664000   |\n","| train/                |          |\n","|    entropy_loss       | -0.745   |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33199    |\n","|    policy_loss        | -0.00329 |\n","|    std                | 0.335    |\n","|    value_loss         | 0.000105 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.59     |\n","|    ep_rew_mean        | -0.206   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 33300    |\n","|    time_elapsed       | 1798     |\n","|    total_timesteps    | 666000   |\n","| train/                |          |\n","|    entropy_loss       | -0.729   |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33299    |\n","|    policy_loss        | -0.0155  |\n","|    std                | 0.334    |\n","|    value_loss         | 0.000204 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 33400    |\n","|    time_elapsed       | 1803     |\n","|    total_timesteps    | 668000   |\n","| train/                |          |\n","|    entropy_loss       | -0.695   |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33399    |\n","|    policy_loss        | -0.00419 |\n","|    std                | 0.331    |\n","|    value_loss         | 0.000118 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.82     |\n","|    ep_rew_mean        | -0.22    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 33500    |\n","|    time_elapsed       | 1809     |\n","|    total_timesteps    | 670000   |\n","| train/                |          |\n","|    entropy_loss       | -0.689   |\n","|    explained_variance | 0.981    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33499    |\n","|    policy_loss        | -0.00792 |\n","|    std                | 0.331    |\n","|    value_loss         | 0.000152 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.64     |\n","|    ep_rew_mean        | -0.196   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 33600    |\n","|    time_elapsed       | 1814     |\n","|    total_timesteps    | 672000   |\n","| train/                |          |\n","|    entropy_loss       | -0.672   |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33599    |\n","|    policy_loss        | 0.00425  |\n","|    std                | 0.33     |\n","|    value_loss         | 0.000116 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.86     |\n","|    ep_rew_mean        | -0.222   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 33700    |\n","|    time_elapsed       | 1820     |\n","|    total_timesteps    | 674000   |\n","| train/                |          |\n","|    entropy_loss       | -0.653   |\n","|    explained_variance | 0.955    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33699    |\n","|    policy_loss        | -0.00029 |\n","|    std                | 0.329    |\n","|    value_loss         | 0.000335 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.61     |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 33800    |\n","|    time_elapsed       | 1825     |\n","|    total_timesteps    | 676000   |\n","| train/                |          |\n","|    entropy_loss       | -0.651   |\n","|    explained_variance | 0.942    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33799    |\n","|    policy_loss        | -0.0063  |\n","|    std                | 0.329    |\n","|    value_loss         | 0.000432 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.8       |\n","|    ep_rew_mean        | -0.222    |\n","| time/                 |           |\n","|    fps                | 370       |\n","|    iterations         | 33900     |\n","|    time_elapsed       | 1830      |\n","|    total_timesteps    | 678000    |\n","| train/                |           |\n","|    entropy_loss       | -0.643    |\n","|    explained_variance | 0.973     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 33899     |\n","|    policy_loss        | -0.000768 |\n","|    std                | 0.328     |\n","|    value_loss         | 0.000148  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34000    |\n","|    time_elapsed       | 1836     |\n","|    total_timesteps    | 680000   |\n","| train/                |          |\n","|    entropy_loss       | -0.632   |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33999    |\n","|    policy_loss        | 0.00146  |\n","|    std                | 0.327    |\n","|    value_loss         | 0.000182 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.213   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34100    |\n","|    time_elapsed       | 1841     |\n","|    total_timesteps    | 682000   |\n","| train/                |          |\n","|    entropy_loss       | -0.627   |\n","|    explained_variance | 0.977    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34099    |\n","|    policy_loss        | -0.0095  |\n","|    std                | 0.326    |\n","|    value_loss         | 0.000314 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.82     |\n","|    ep_rew_mean        | -0.224   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34200    |\n","|    time_elapsed       | 1847     |\n","|    total_timesteps    | 684000   |\n","| train/                |          |\n","|    entropy_loss       | -0.631   |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34199    |\n","|    policy_loss        | 0.0112   |\n","|    std                | 0.326    |\n","|    value_loss         | 0.000291 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.63     |\n","|    ep_rew_mean        | -0.198   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34300    |\n","|    time_elapsed       | 1852     |\n","|    total_timesteps    | 686000   |\n","| train/                |          |\n","|    entropy_loss       | -0.618   |\n","|    explained_variance | 0.964    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34299    |\n","|    policy_loss        | 0.0161   |\n","|    std                | 0.326    |\n","|    value_loss         | 0.000308 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34400    |\n","|    time_elapsed       | 1857     |\n","|    total_timesteps    | 688000   |\n","| train/                |          |\n","|    entropy_loss       | -0.614   |\n","|    explained_variance | 0.98     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34399    |\n","|    policy_loss        | 0.00919  |\n","|    std                | 0.325    |\n","|    value_loss         | 0.000281 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.64     |\n","|    ep_rew_mean        | -0.194   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34500    |\n","|    time_elapsed       | 1863     |\n","|    total_timesteps    | 690000   |\n","| train/                |          |\n","|    entropy_loss       | -0.618   |\n","|    explained_variance | 0.981    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34499    |\n","|    policy_loss        | 0.00651  |\n","|    std                | 0.325    |\n","|    value_loss         | 7.66e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.84     |\n","|    ep_rew_mean        | -0.225   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34600    |\n","|    time_elapsed       | 1868     |\n","|    total_timesteps    | 692000   |\n","| train/                |          |\n","|    entropy_loss       | -0.624   |\n","|    explained_variance | 0.974    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34599    |\n","|    policy_loss        | 0.00316  |\n","|    std                | 0.326    |\n","|    value_loss         | 0.000231 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.9      |\n","|    ep_rew_mean        | -0.233   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34700    |\n","|    time_elapsed       | 1874     |\n","|    total_timesteps    | 694000   |\n","| train/                |          |\n","|    entropy_loss       | -0.626   |\n","|    explained_variance | 0.919    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34699    |\n","|    policy_loss        | 0.00283  |\n","|    std                | 0.326    |\n","|    value_loss         | 0.000538 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.213   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34800    |\n","|    time_elapsed       | 1879     |\n","|    total_timesteps    | 696000   |\n","| train/                |          |\n","|    entropy_loss       | -0.619   |\n","|    explained_variance | 0.962    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34799    |\n","|    policy_loss        | 0.02     |\n","|    std                | 0.326    |\n","|    value_loss         | 0.000824 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.97     |\n","|    ep_rew_mean        | -0.239   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 34900    |\n","|    time_elapsed       | 1885     |\n","|    total_timesteps    | 698000   |\n","| train/                |          |\n","|    entropy_loss       | -0.621   |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34899    |\n","|    policy_loss        | 0.000574 |\n","|    std                | 0.326    |\n","|    value_loss         | 0.000166 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35000    |\n","|    time_elapsed       | 1890     |\n","|    total_timesteps    | 700000   |\n","| train/                |          |\n","|    entropy_loss       | -0.617   |\n","|    explained_variance | 0.941    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34999    |\n","|    policy_loss        | 0.00347  |\n","|    std                | 0.325    |\n","|    value_loss         | 0.000159 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.208   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35100    |\n","|    time_elapsed       | 1895     |\n","|    total_timesteps    | 702000   |\n","| train/                |          |\n","|    entropy_loss       | -0.606   |\n","|    explained_variance | 0.913    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35099    |\n","|    policy_loss        | 0.0074   |\n","|    std                | 0.323    |\n","|    value_loss         | 0.000453 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35200    |\n","|    time_elapsed       | 1901     |\n","|    total_timesteps    | 704000   |\n","| train/                |          |\n","|    entropy_loss       | -0.618   |\n","|    explained_variance | 0.956    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35199    |\n","|    policy_loss        | -0.0156  |\n","|    std                | 0.323    |\n","|    value_loss         | 0.000196 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35300    |\n","|    time_elapsed       | 1906     |\n","|    total_timesteps    | 706000   |\n","| train/                |          |\n","|    entropy_loss       | -0.599   |\n","|    explained_variance | 0.977    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35299    |\n","|    policy_loss        | -0.00947 |\n","|    std                | 0.321    |\n","|    value_loss         | 0.000158 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.81     |\n","|    ep_rew_mean        | -0.221   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35400    |\n","|    time_elapsed       | 1912     |\n","|    total_timesteps    | 708000   |\n","| train/                |          |\n","|    entropy_loss       | -0.588   |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35399    |\n","|    policy_loss        | -0.00901 |\n","|    std                | 0.32     |\n","|    value_loss         | 8.99e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.212   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35500    |\n","|    time_elapsed       | 1917     |\n","|    total_timesteps    | 710000   |\n","| train/                |          |\n","|    entropy_loss       | -0.563   |\n","|    explained_variance | 0.953    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35499    |\n","|    policy_loss        | -0.00164 |\n","|    std                | 0.318    |\n","|    value_loss         | 0.000211 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.214   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35600    |\n","|    time_elapsed       | 1922     |\n","|    total_timesteps    | 712000   |\n","| train/                |          |\n","|    entropy_loss       | -0.551   |\n","|    explained_variance | 0.955    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35599    |\n","|    policy_loss        | 0.00301  |\n","|    std                | 0.317    |\n","|    value_loss         | 0.000355 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.85     |\n","|    ep_rew_mean        | -0.224   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35700    |\n","|    time_elapsed       | 1928     |\n","|    total_timesteps    | 714000   |\n","| train/                |          |\n","|    entropy_loss       | -0.544   |\n","|    explained_variance | 0.954    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35699    |\n","|    policy_loss        | -0.006   |\n","|    std                | 0.316    |\n","|    value_loss         | 0.000688 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 3.03     |\n","|    ep_rew_mean        | -0.245   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35800    |\n","|    time_elapsed       | 1933     |\n","|    total_timesteps    | 716000   |\n","| train/                |          |\n","|    entropy_loss       | -0.552   |\n","|    explained_variance | 0.577    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35799    |\n","|    policy_loss        | 0.0161   |\n","|    std                | 0.317    |\n","|    value_loss         | 0.00323  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.223   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 35900    |\n","|    time_elapsed       | 1939     |\n","|    total_timesteps    | 718000   |\n","| train/                |          |\n","|    entropy_loss       | -0.554   |\n","|    explained_variance | 0.936    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35899    |\n","|    policy_loss        | -0.00517 |\n","|    std                | 0.317    |\n","|    value_loss         | 0.000629 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.83     |\n","|    ep_rew_mean        | -0.221   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 36000    |\n","|    time_elapsed       | 1944     |\n","|    total_timesteps    | 720000   |\n","| train/                |          |\n","|    entropy_loss       | -0.547   |\n","|    explained_variance | 0.951    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35999    |\n","|    policy_loss        | 0.00271  |\n","|    std                | 0.317    |\n","|    value_loss         | 0.000222 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.8      |\n","|    ep_rew_mean        | -0.226   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 36100    |\n","|    time_elapsed       | 1950     |\n","|    total_timesteps    | 722000   |\n","| train/                |          |\n","|    entropy_loss       | -0.533   |\n","|    explained_variance | 0.957    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36099    |\n","|    policy_loss        | -0.00146 |\n","|    std                | 0.316    |\n","|    value_loss         | 0.000196 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 36200    |\n","|    time_elapsed       | 1956     |\n","|    total_timesteps    | 724000   |\n","| train/                |          |\n","|    entropy_loss       | -0.51    |\n","|    explained_variance | 0.969    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36199    |\n","|    policy_loss        | 0.0105   |\n","|    std                | 0.314    |\n","|    value_loss         | 0.000754 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.214   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 36300    |\n","|    time_elapsed       | 1961     |\n","|    total_timesteps    | 726000   |\n","| train/                |          |\n","|    entropy_loss       | -0.486   |\n","|    explained_variance | 0.954    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36299    |\n","|    policy_loss        | 0.00552  |\n","|    std                | 0.311    |\n","|    value_loss         | 0.000373 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.75     |\n","|    ep_rew_mean        | -0.216   |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 36400    |\n","|    time_elapsed       | 1967     |\n","|    total_timesteps    | 728000   |\n","| train/                |          |\n","|    entropy_loss       | -0.477   |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36399    |\n","|    policy_loss        | -0.0127  |\n","|    std                | 0.311    |\n","|    value_loss         | 0.000318 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.56     |\n","|    ep_rew_mean        | -0.2     |\n","| time/                 |          |\n","|    fps                | 370      |\n","|    iterations         | 36500    |\n","|    time_elapsed       | 1972     |\n","|    total_timesteps    | 730000   |\n","| train/                |          |\n","|    entropy_loss       | -0.467   |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36499    |\n","|    policy_loss        | 0.00378  |\n","|    std                | 0.31     |\n","|    value_loss         | 0.00012  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.216   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 36600    |\n","|    time_elapsed       | 1979     |\n","|    total_timesteps    | 732000   |\n","| train/                |          |\n","|    entropy_loss       | -0.473   |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36599    |\n","|    policy_loss        | 0.00482  |\n","|    std                | 0.31     |\n","|    value_loss         | 0.000204 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.7      |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 36700    |\n","|    time_elapsed       | 1984     |\n","|    total_timesteps    | 734000   |\n","| train/                |          |\n","|    entropy_loss       | -0.455   |\n","|    explained_variance | 0.872    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36699    |\n","|    policy_loss        | 0.00152  |\n","|    std                | 0.308    |\n","|    value_loss         | 0.000986 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.216   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 36800    |\n","|    time_elapsed       | 1989     |\n","|    total_timesteps    | 736000   |\n","| train/                |          |\n","|    entropy_loss       | -0.451   |\n","|    explained_variance | 0.977    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36799    |\n","|    policy_loss        | -0.00245 |\n","|    std                | 0.308    |\n","|    value_loss         | 0.000164 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.206   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 36900    |\n","|    time_elapsed       | 1995     |\n","|    total_timesteps    | 738000   |\n","| train/                |          |\n","|    entropy_loss       | -0.44    |\n","|    explained_variance | 0.973    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36899    |\n","|    policy_loss        | -0.0021  |\n","|    std                | 0.308    |\n","|    value_loss         | 0.000133 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.64     |\n","|    ep_rew_mean        | -0.194   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37000    |\n","|    time_elapsed       | 2000     |\n","|    total_timesteps    | 740000   |\n","| train/                |          |\n","|    entropy_loss       | -0.422   |\n","|    explained_variance | 0.818    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36999    |\n","|    policy_loss        | 0.00654  |\n","|    std                | 0.306    |\n","|    value_loss         | 0.0013   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.7      |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37100    |\n","|    time_elapsed       | 2007     |\n","|    total_timesteps    | 742000   |\n","| train/                |          |\n","|    entropy_loss       | -0.42    |\n","|    explained_variance | 0.965    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37099    |\n","|    policy_loss        | -0.0197  |\n","|    std                | 0.306    |\n","|    value_loss         | 0.000427 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.87     |\n","|    ep_rew_mean        | -0.222   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37200    |\n","|    time_elapsed       | 2012     |\n","|    total_timesteps    | 744000   |\n","| train/                |          |\n","|    entropy_loss       | -0.418   |\n","|    explained_variance | 0.738    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37199    |\n","|    policy_loss        | 0.013    |\n","|    std                | 0.307    |\n","|    value_loss         | 0.00267  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.62     |\n","|    ep_rew_mean        | -0.19    |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37300    |\n","|    time_elapsed       | 2018     |\n","|    total_timesteps    | 746000   |\n","| train/                |          |\n","|    entropy_loss       | -0.429   |\n","|    explained_variance | 0.949    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37299    |\n","|    policy_loss        | 0.00391  |\n","|    std                | 0.307    |\n","|    value_loss         | 0.000298 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.202   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37400    |\n","|    time_elapsed       | 2023     |\n","|    total_timesteps    | 748000   |\n","| train/                |          |\n","|    entropy_loss       | -0.417   |\n","|    explained_variance | 0.96     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37399    |\n","|    policy_loss        | 0.00301  |\n","|    std                | 0.306    |\n","|    value_loss         | 0.000298 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.79     |\n","|    ep_rew_mean        | -0.222   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37500    |\n","|    time_elapsed       | 2029     |\n","|    total_timesteps    | 750000   |\n","| train/                |          |\n","|    entropy_loss       | -0.41    |\n","|    explained_variance | 0.966    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37499    |\n","|    policy_loss        | 0.00954  |\n","|    std                | 0.305    |\n","|    value_loss         | 0.000347 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37600    |\n","|    time_elapsed       | 2035     |\n","|    total_timesteps    | 752000   |\n","| train/                |          |\n","|    entropy_loss       | -0.401   |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37599    |\n","|    policy_loss        | -0.00145 |\n","|    std                | 0.304    |\n","|    value_loss         | 0.000125 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.98     |\n","|    ep_rew_mean        | -0.243   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37700    |\n","|    time_elapsed       | 2040     |\n","|    total_timesteps    | 754000   |\n","| train/                |          |\n","|    entropy_loss       | -0.391   |\n","|    explained_variance | 0.986    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37699    |\n","|    policy_loss        | 0.00078  |\n","|    std                | 0.303    |\n","|    value_loss         | 0.000243 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.66     |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37800    |\n","|    time_elapsed       | 2046     |\n","|    total_timesteps    | 756000   |\n","| train/                |          |\n","|    entropy_loss       | -0.378   |\n","|    explained_variance | 0.98     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37799    |\n","|    policy_loss        | 0.00376  |\n","|    std                | 0.302    |\n","|    value_loss         | 0.000143 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.72     |\n","|    ep_rew_mean        | -0.215   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 37900    |\n","|    time_elapsed       | 2051     |\n","|    total_timesteps    | 758000   |\n","| train/                |          |\n","|    entropy_loss       | -0.367   |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37899    |\n","|    policy_loss        | 0.00106  |\n","|    std                | 0.301    |\n","|    value_loss         | 7.36e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.91     |\n","|    ep_rew_mean        | -0.239   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38000    |\n","|    time_elapsed       | 2056     |\n","|    total_timesteps    | 760000   |\n","| train/                |          |\n","|    entropy_loss       | -0.347   |\n","|    explained_variance | 0.978    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37999    |\n","|    policy_loss        | 0.00239  |\n","|    std                | 0.299    |\n","|    value_loss         | 0.000211 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.59     |\n","|    ep_rew_mean        | -0.198   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38100    |\n","|    time_elapsed       | 2062     |\n","|    total_timesteps    | 762000   |\n","| train/                |          |\n","|    entropy_loss       | -0.357   |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38099    |\n","|    policy_loss        | -0.00176 |\n","|    std                | 0.301    |\n","|    value_loss         | 0.000155 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.212   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38200    |\n","|    time_elapsed       | 2066     |\n","|    total_timesteps    | 764000   |\n","| train/                |          |\n","|    entropy_loss       | -0.34    |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38199    |\n","|    policy_loss        | -0.00693 |\n","|    std                | 0.299    |\n","|    value_loss         | 8.58e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.79     |\n","|    ep_rew_mean        | -0.227   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38300    |\n","|    time_elapsed       | 2073     |\n","|    total_timesteps    | 766000   |\n","| train/                |          |\n","|    entropy_loss       | -0.335   |\n","|    explained_variance | 0.962    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38299    |\n","|    policy_loss        | 0.00789  |\n","|    std                | 0.299    |\n","|    value_loss         | 0.000405 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.212   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38400    |\n","|    time_elapsed       | 2078     |\n","|    total_timesteps    | 768000   |\n","| train/                |          |\n","|    entropy_loss       | -0.337   |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38399    |\n","|    policy_loss        | -0.00164 |\n","|    std                | 0.298    |\n","|    value_loss         | 7.49e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.64     |\n","|    ep_rew_mean        | -0.195   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38500    |\n","|    time_elapsed       | 2084     |\n","|    total_timesteps    | 770000   |\n","| train/                |          |\n","|    entropy_loss       | -0.323   |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38499    |\n","|    policy_loss        | 0.000334 |\n","|    std                | 0.297    |\n","|    value_loss         | 7.01e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.7      |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38600    |\n","|    time_elapsed       | 2089     |\n","|    total_timesteps    | 772000   |\n","| train/                |          |\n","|    entropy_loss       | -0.314   |\n","|    explained_variance | 0.96     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38599    |\n","|    policy_loss        | 0.00254  |\n","|    std                | 0.296    |\n","|    value_loss         | 0.000353 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.83     |\n","|    ep_rew_mean        | -0.223   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38700    |\n","|    time_elapsed       | 2095     |\n","|    total_timesteps    | 774000   |\n","| train/                |          |\n","|    entropy_loss       | -0.324   |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38699    |\n","|    policy_loss        | 0.000697 |\n","|    std                | 0.299    |\n","|    value_loss         | 0.000159 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.81     |\n","|    ep_rew_mean        | -0.222   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38800    |\n","|    time_elapsed       | 2100     |\n","|    total_timesteps    | 776000   |\n","| train/                |          |\n","|    entropy_loss       | -0.306   |\n","|    explained_variance | 0.974    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38799    |\n","|    policy_loss        | -0.00347 |\n","|    std                | 0.298    |\n","|    value_loss         | 0.000229 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.77     |\n","|    ep_rew_mean        | -0.214   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 38900    |\n","|    time_elapsed       | 2105     |\n","|    total_timesteps    | 778000   |\n","| train/                |          |\n","|    entropy_loss       | -0.318   |\n","|    explained_variance | 0.655    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38899    |\n","|    policy_loss        | 0.000817 |\n","|    std                | 0.3      |\n","|    value_loss         | 0.00321  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.209   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 39000    |\n","|    time_elapsed       | 2111     |\n","|    total_timesteps    | 780000   |\n","| train/                |          |\n","|    entropy_loss       | -0.297   |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38999    |\n","|    policy_loss        | -0.00423 |\n","|    std                | 0.298    |\n","|    value_loss         | 7.47e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 39100    |\n","|    time_elapsed       | 2116     |\n","|    total_timesteps    | 782000   |\n","| train/                |          |\n","|    entropy_loss       | -0.282   |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39099    |\n","|    policy_loss        | -0.00336 |\n","|    std                | 0.297    |\n","|    value_loss         | 8.87e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.75     |\n","|    ep_rew_mean        | -0.213   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 39200    |\n","|    time_elapsed       | 2122     |\n","|    total_timesteps    | 784000   |\n","| train/                |          |\n","|    entropy_loss       | -0.271   |\n","|    explained_variance | 0.977    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39199    |\n","|    policy_loss        | 0.00463  |\n","|    std                | 0.295    |\n","|    value_loss         | 0.000164 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.216   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 39300    |\n","|    time_elapsed       | 2128     |\n","|    total_timesteps    | 786000   |\n","| train/                |          |\n","|    entropy_loss       | -0.286   |\n","|    explained_variance | 0.835    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39299    |\n","|    policy_loss        | -0.0073  |\n","|    std                | 0.297    |\n","|    value_loss         | 0.00105  |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.78      |\n","|    ep_rew_mean        | -0.231    |\n","| time/                 |           |\n","|    fps                | 369       |\n","|    iterations         | 39400     |\n","|    time_elapsed       | 2133      |\n","|    total_timesteps    | 788000    |\n","| train/                |           |\n","|    entropy_loss       | -0.26     |\n","|    explained_variance | 0.982     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 39399     |\n","|    policy_loss        | -0.000717 |\n","|    std                | 0.294     |\n","|    value_loss         | 0.000136  |\n","-------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.66      |\n","|    ep_rew_mean        | -0.205    |\n","| time/                 |           |\n","|    fps                | 369       |\n","|    iterations         | 39500     |\n","|    time_elapsed       | 2139      |\n","|    total_timesteps    | 790000    |\n","| train/                |           |\n","|    entropy_loss       | -0.238    |\n","|    explained_variance | 0.996     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 39499     |\n","|    policy_loss        | -2.25e-05 |\n","|    std                | 0.293     |\n","|    value_loss         | 3.68e-05  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.7      |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 39600    |\n","|    time_elapsed       | 2144     |\n","|    total_timesteps    | 792000   |\n","| train/                |          |\n","|    entropy_loss       | -0.23    |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39599    |\n","|    policy_loss        | 0.00124  |\n","|    std                | 0.292    |\n","|    value_loss         | 0.000345 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.8      |\n","|    ep_rew_mean        | -0.221   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 39700    |\n","|    time_elapsed       | 2151     |\n","|    total_timesteps    | 794000   |\n","| train/                |          |\n","|    entropy_loss       | -0.223   |\n","|    explained_variance | 0.955    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39699    |\n","|    policy_loss        | -0.00696 |\n","|    std                | 0.292    |\n","|    value_loss         | 0.000445 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.77     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 39800    |\n","|    time_elapsed       | 2156     |\n","|    total_timesteps    | 796000   |\n","| train/                |          |\n","|    entropy_loss       | -0.202   |\n","|    explained_variance | 0.962    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39799    |\n","|    policy_loss        | 0.00756  |\n","|    std                | 0.291    |\n","|    value_loss         | 0.000269 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.62     |\n","|    ep_rew_mean        | -0.209   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 39900    |\n","|    time_elapsed       | 2161     |\n","|    total_timesteps    | 798000   |\n","| train/                |          |\n","|    entropy_loss       | -0.201   |\n","|    explained_variance | 0.939    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39899    |\n","|    policy_loss        | -0.0023  |\n","|    std                | 0.291    |\n","|    value_loss         | 0.0004   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.81     |\n","|    ep_rew_mean        | -0.222   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 40000    |\n","|    time_elapsed       | 2167     |\n","|    total_timesteps    | 800000   |\n","| train/                |          |\n","|    entropy_loss       | -0.21    |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39999    |\n","|    policy_loss        | 0.000684 |\n","|    std                | 0.292    |\n","|    value_loss         | 0.000195 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.199   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 40100    |\n","|    time_elapsed       | 2172     |\n","|    total_timesteps    | 802000   |\n","| train/                |          |\n","|    entropy_loss       | -0.215   |\n","|    explained_variance | 0.958    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40099    |\n","|    policy_loss        | -0.0137  |\n","|    std                | 0.292    |\n","|    value_loss         | 0.000298 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.206   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 40200    |\n","|    time_elapsed       | 2178     |\n","|    total_timesteps    | 804000   |\n","| train/                |          |\n","|    entropy_loss       | -0.215   |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40199    |\n","|    policy_loss        | 0.00292  |\n","|    std                | 0.292    |\n","|    value_loss         | 0.000183 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.94     |\n","|    ep_rew_mean        | -0.228   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 40300    |\n","|    time_elapsed       | 2183     |\n","|    total_timesteps    | 806000   |\n","| train/                |          |\n","|    entropy_loss       | -0.208   |\n","|    explained_variance | 0.986    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40299    |\n","|    policy_loss        | 0.00246  |\n","|    std                | 0.291    |\n","|    value_loss         | 8.77e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 40400    |\n","|    time_elapsed       | 2189     |\n","|    total_timesteps    | 808000   |\n","| train/                |          |\n","|    entropy_loss       | -0.224   |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40399    |\n","|    policy_loss        | -0.0059  |\n","|    std                | 0.293    |\n","|    value_loss         | 0.0002   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 40500    |\n","|    time_elapsed       | 2194     |\n","|    total_timesteps    | 810000   |\n","| train/                |          |\n","|    entropy_loss       | -0.213   |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40499    |\n","|    policy_loss        | -0.00346 |\n","|    std                | 0.292    |\n","|    value_loss         | 5.02e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.56     |\n","|    ep_rew_mean        | -0.188   |\n","| time/                 |          |\n","|    fps                | 369      |\n","|    iterations         | 40600    |\n","|    time_elapsed       | 2199     |\n","|    total_timesteps    | 812000   |\n","| train/                |          |\n","|    entropy_loss       | -0.2     |\n","|    explained_variance | 0.966    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40599    |\n","|    policy_loss        | -0.0023  |\n","|    std                | 0.291    |\n","|    value_loss         | 0.00013  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.7      |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 40700    |\n","|    time_elapsed       | 2207     |\n","|    total_timesteps    | 814000   |\n","| train/                |          |\n","|    entropy_loss       | -0.196   |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40699    |\n","|    policy_loss        | 0.00152  |\n","|    std                | 0.29     |\n","|    value_loss         | 0.000202 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.206   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 40800    |\n","|    time_elapsed       | 2212     |\n","|    total_timesteps    | 816000   |\n","| train/                |          |\n","|    entropy_loss       | -0.183   |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40799    |\n","|    policy_loss        | -0.00116 |\n","|    std                | 0.289    |\n","|    value_loss         | 7.77e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.57     |\n","|    ep_rew_mean        | -0.192   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 40900    |\n","|    time_elapsed       | 2218     |\n","|    total_timesteps    | 818000   |\n","| train/                |          |\n","|    entropy_loss       | -0.151   |\n","|    explained_variance | 0.975    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40899    |\n","|    policy_loss        | -0.00194 |\n","|    std                | 0.286    |\n","|    value_loss         | 0.000199 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.57     |\n","|    ep_rew_mean        | -0.198   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 41000    |\n","|    time_elapsed       | 2223     |\n","|    total_timesteps    | 820000   |\n","| train/                |          |\n","|    entropy_loss       | -0.135   |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40999    |\n","|    policy_loss        | 0.000996 |\n","|    std                | 0.285    |\n","|    value_loss         | 0.000121 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.66     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 41100    |\n","|    time_elapsed       | 2229     |\n","|    total_timesteps    | 822000   |\n","| train/                |          |\n","|    entropy_loss       | -0.126   |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41099    |\n","|    policy_loss        | -0.00212 |\n","|    std                | 0.284    |\n","|    value_loss         | 0.000156 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.57     |\n","|    ep_rew_mean        | -0.199   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 41200    |\n","|    time_elapsed       | 2234     |\n","|    total_timesteps    | 824000   |\n","| train/                |          |\n","|    entropy_loss       | -0.126   |\n","|    explained_variance | 0.968    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41199    |\n","|    policy_loss        | -0.00282 |\n","|    std                | 0.284    |\n","|    value_loss         | 0.000188 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.96     |\n","|    ep_rew_mean        | -0.24    |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 41300    |\n","|    time_elapsed       | 2240     |\n","|    total_timesteps    | 826000   |\n","| train/                |          |\n","|    entropy_loss       | -0.123   |\n","|    explained_variance | 0.965    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41299    |\n","|    policy_loss        | 0.00622  |\n","|    std                | 0.285    |\n","|    value_loss         | 0.000636 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.212   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 41400    |\n","|    time_elapsed       | 2246     |\n","|    total_timesteps    | 828000   |\n","| train/                |          |\n","|    entropy_loss       | -0.116   |\n","|    explained_variance | 0.966    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41399    |\n","|    policy_loss        | -0.00257 |\n","|    std                | 0.285    |\n","|    value_loss         | 0.000232 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.215   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 41500    |\n","|    time_elapsed       | 2251     |\n","|    total_timesteps    | 830000   |\n","| train/                |          |\n","|    entropy_loss       | -0.113   |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41499    |\n","|    policy_loss        | 0.00336  |\n","|    std                | 0.285    |\n","|    value_loss         | 0.000249 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.7       |\n","|    ep_rew_mean        | -0.205    |\n","| time/                 |           |\n","|    fps                | 368       |\n","|    iterations         | 41600     |\n","|    time_elapsed       | 2257      |\n","|    total_timesteps    | 832000    |\n","| train/                |           |\n","|    entropy_loss       | -0.115    |\n","|    explained_variance | 0.991     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 41599     |\n","|    policy_loss        | -8.09e-06 |\n","|    std                | 0.286     |\n","|    value_loss         | 8.17e-05  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.63     |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 41700    |\n","|    time_elapsed       | 2262     |\n","|    total_timesteps    | 834000   |\n","| train/                |          |\n","|    entropy_loss       | -0.0992  |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41699    |\n","|    policy_loss        | -0.00135 |\n","|    std                | 0.286    |\n","|    value_loss         | 0.000217 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.9      |\n","|    ep_rew_mean        | -0.239   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 41800    |\n","|    time_elapsed       | 2269     |\n","|    total_timesteps    | 836000   |\n","| train/                |          |\n","|    entropy_loss       | -0.0974  |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41799    |\n","|    policy_loss        | 0.00715  |\n","|    std                | 0.286    |\n","|    value_loss         | 0.000153 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.59     |\n","|    ep_rew_mean        | -0.194   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 41900    |\n","|    time_elapsed       | 2274     |\n","|    total_timesteps    | 838000   |\n","| train/                |          |\n","|    entropy_loss       | -0.0765  |\n","|    explained_variance | 0.973    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41899    |\n","|    policy_loss        | 0.00194  |\n","|    std                | 0.285    |\n","|    value_loss         | 0.000179 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.77      |\n","|    ep_rew_mean        | -0.22     |\n","| time/                 |           |\n","|    fps                | 368       |\n","|    iterations         | 42000     |\n","|    time_elapsed       | 2279      |\n","|    total_timesteps    | 840000    |\n","| train/                |           |\n","|    entropy_loss       | -0.0793   |\n","|    explained_variance | 0.979     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 41999     |\n","|    policy_loss        | -0.000296 |\n","|    std                | 0.286     |\n","|    value_loss         | 0.000206  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.72     |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 42100    |\n","|    time_elapsed       | 2285     |\n","|    total_timesteps    | 842000   |\n","| train/                |          |\n","|    entropy_loss       | -0.0627  |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42099    |\n","|    policy_loss        | 0.000554 |\n","|    std                | 0.285    |\n","|    value_loss         | 0.000218 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.75     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 42200    |\n","|    time_elapsed       | 2290     |\n","|    total_timesteps    | 844000   |\n","| train/                |          |\n","|    entropy_loss       | -0.0515  |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42199    |\n","|    policy_loss        | -0.00696 |\n","|    std                | 0.285    |\n","|    value_loss         | 0.000283 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.7      |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 42300    |\n","|    time_elapsed       | 2296     |\n","|    total_timesteps    | 846000   |\n","| train/                |          |\n","|    entropy_loss       | -0.0408  |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42299    |\n","|    policy_loss        | -0.00423 |\n","|    std                | 0.283    |\n","|    value_loss         | 0.000156 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.75     |\n","|    ep_rew_mean        | -0.221   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 42400    |\n","|    time_elapsed       | 2302     |\n","|    total_timesteps    | 848000   |\n","| train/                |          |\n","|    entropy_loss       | -0.0206  |\n","|    explained_variance | 0.863    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42399    |\n","|    policy_loss        | -0.012   |\n","|    std                | 0.281    |\n","|    value_loss         | 0.00221  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.86     |\n","|    ep_rew_mean        | -0.219   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 42500    |\n","|    time_elapsed       | 2307     |\n","|    total_timesteps    | 850000   |\n","| train/                |          |\n","|    entropy_loss       | -0.0021  |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42499    |\n","|    policy_loss        | -0.00534 |\n","|    std                | 0.279    |\n","|    value_loss         | 0.000232 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.64     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 42600    |\n","|    time_elapsed       | 2313     |\n","|    total_timesteps    | 852000   |\n","| train/                |          |\n","|    entropy_loss       | -0.0125  |\n","|    explained_variance | 0.98     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42599    |\n","|    policy_loss        | 0.00384  |\n","|    std                | 0.281    |\n","|    value_loss         | 0.000232 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.72     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 42700    |\n","|    time_elapsed       | 2318     |\n","|    total_timesteps    | 854000   |\n","| train/                |          |\n","|    entropy_loss       | 0.00768  |\n","|    explained_variance | 0.948    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42699    |\n","|    policy_loss        | -0.00122 |\n","|    std                | 0.279    |\n","|    value_loss         | 0.000569 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.62      |\n","|    ep_rew_mean        | -0.209    |\n","| time/                 |           |\n","|    fps                | 368       |\n","|    iterations         | 42800     |\n","|    time_elapsed       | 2324      |\n","|    total_timesteps    | 856000    |\n","| train/                |           |\n","|    entropy_loss       | 0.0185    |\n","|    explained_variance | 0.988     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 42799     |\n","|    policy_loss        | -0.000285 |\n","|    std                | 0.279     |\n","|    value_loss         | 8.38e-05  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 42900    |\n","|    time_elapsed       | 2330     |\n","|    total_timesteps    | 858000   |\n","| train/                |          |\n","|    entropy_loss       | 0.0162   |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42899    |\n","|    policy_loss        | 0.00484  |\n","|    std                | 0.28     |\n","|    value_loss         | 0.000399 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.65     |\n","|    ep_rew_mean        | -0.202   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43000    |\n","|    time_elapsed       | 2336     |\n","|    total_timesteps    | 860000   |\n","| train/                |          |\n","|    entropy_loss       | 0.0316   |\n","|    explained_variance | 0.978    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42999    |\n","|    policy_loss        | 0.00017  |\n","|    std                | 0.28     |\n","|    value_loss         | 0.000129 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.82     |\n","|    ep_rew_mean        | -0.225   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43100    |\n","|    time_elapsed       | 2341     |\n","|    total_timesteps    | 862000   |\n","| train/                |          |\n","|    entropy_loss       | 0.0497   |\n","|    explained_variance | 0.916    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43099    |\n","|    policy_loss        | 0.00899  |\n","|    std                | 0.278    |\n","|    value_loss         | 0.000573 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.58     |\n","|    ep_rew_mean        | -0.196   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43200    |\n","|    time_elapsed       | 2346     |\n","|    total_timesteps    | 864000   |\n","| train/                |          |\n","|    entropy_loss       | 0.0705   |\n","|    explained_variance | 0.97     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43199    |\n","|    policy_loss        | -0.0026  |\n","|    std                | 0.276    |\n","|    value_loss         | 0.000282 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43300    |\n","|    time_elapsed       | 2351     |\n","|    total_timesteps    | 866000   |\n","| train/                |          |\n","|    entropy_loss       | 0.0972   |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43299    |\n","|    policy_loss        | 0.00171  |\n","|    std                | 0.273    |\n","|    value_loss         | 0.000169 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.213   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43400    |\n","|    time_elapsed       | 2356     |\n","|    total_timesteps    | 868000   |\n","| train/                |          |\n","|    entropy_loss       | 0.103    |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43399    |\n","|    policy_loss        | -0.00518 |\n","|    std                | 0.272    |\n","|    value_loss         | 0.000144 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.75     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43500    |\n","|    time_elapsed       | 2362     |\n","|    total_timesteps    | 870000   |\n","| train/                |          |\n","|    entropy_loss       | 0.124    |\n","|    explained_variance | 0.74     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43499    |\n","|    policy_loss        | 0.0128   |\n","|    std                | 0.27     |\n","|    value_loss         | 0.00289  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.53     |\n","|    ep_rew_mean        | -0.185   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43600    |\n","|    time_elapsed       | 2367     |\n","|    total_timesteps    | 872000   |\n","| train/                |          |\n","|    entropy_loss       | 0.14     |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43599    |\n","|    policy_loss        | 4.74e-05 |\n","|    std                | 0.269    |\n","|    value_loss         | 0.000178 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43700    |\n","|    time_elapsed       | 2373     |\n","|    total_timesteps    | 874000   |\n","| train/                |          |\n","|    entropy_loss       | 0.15     |\n","|    explained_variance | 0.87     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43699    |\n","|    policy_loss        | -0.00248 |\n","|    std                | 0.269    |\n","|    value_loss         | 0.00118  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.208   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43800    |\n","|    time_elapsed       | 2377     |\n","|    total_timesteps    | 876000   |\n","| train/                |          |\n","|    entropy_loss       | 0.153    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43799    |\n","|    policy_loss        | 0.000131 |\n","|    std                | 0.269    |\n","|    value_loss         | 0.000181 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.227   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 43900    |\n","|    time_elapsed       | 2383     |\n","|    total_timesteps    | 878000   |\n","| train/                |          |\n","|    entropy_loss       | 0.166    |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43899    |\n","|    policy_loss        | -0.0044  |\n","|    std                | 0.268    |\n","|    value_loss         | 0.00018  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44000    |\n","|    time_elapsed       | 2388     |\n","|    total_timesteps    | 880000   |\n","| train/                |          |\n","|    entropy_loss       | 0.171    |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43999    |\n","|    policy_loss        | -0.00274 |\n","|    std                | 0.268    |\n","|    value_loss         | 0.000212 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.91     |\n","|    ep_rew_mean        | -0.226   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44100    |\n","|    time_elapsed       | 2394     |\n","|    total_timesteps    | 882000   |\n","| train/                |          |\n","|    entropy_loss       | 0.18     |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44099    |\n","|    policy_loss        | 0.00268  |\n","|    std                | 0.268    |\n","|    value_loss         | 0.000135 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.206   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44200    |\n","|    time_elapsed       | 2400     |\n","|    total_timesteps    | 884000   |\n","| train/                |          |\n","|    entropy_loss       | 0.187    |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44199    |\n","|    policy_loss        | 0.00523  |\n","|    std                | 0.268    |\n","|    value_loss         | 0.000261 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.69     |\n","|    ep_rew_mean        | -0.201   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44300    |\n","|    time_elapsed       | 2404     |\n","|    total_timesteps    | 886000   |\n","| train/                |          |\n","|    entropy_loss       | 0.193    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44299    |\n","|    policy_loss        | -0.00104 |\n","|    std                | 0.268    |\n","|    value_loss         | 5.67e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.99     |\n","|    ep_rew_mean        | -0.227   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44400    |\n","|    time_elapsed       | 2410     |\n","|    total_timesteps    | 888000   |\n","| train/                |          |\n","|    entropy_loss       | 0.184    |\n","|    explained_variance | 0.795    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44399    |\n","|    policy_loss        | 0.00176  |\n","|    std                | 0.269    |\n","|    value_loss         | 0.000957 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44500    |\n","|    time_elapsed       | 2415     |\n","|    total_timesteps    | 890000   |\n","| train/                |          |\n","|    entropy_loss       | 0.187    |\n","|    explained_variance | 0.955    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44499    |\n","|    policy_loss        | -0.0046  |\n","|    std                | 0.269    |\n","|    value_loss         | 0.000338 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44600    |\n","|    time_elapsed       | 2421     |\n","|    total_timesteps    | 892000   |\n","| train/                |          |\n","|    entropy_loss       | 0.184    |\n","|    explained_variance | 0.949    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44599    |\n","|    policy_loss        | 0.00029  |\n","|    std                | 0.269    |\n","|    value_loss         | 0.000338 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.87     |\n","|    ep_rew_mean        | -0.234   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44700    |\n","|    time_elapsed       | 2426     |\n","|    total_timesteps    | 894000   |\n","| train/                |          |\n","|    entropy_loss       | 0.182    |\n","|    explained_variance | 0.801    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44699    |\n","|    policy_loss        | -0.0241  |\n","|    std                | 0.27     |\n","|    value_loss         | 0.00115  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.84     |\n","|    ep_rew_mean        | -0.225   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44800    |\n","|    time_elapsed       | 2432     |\n","|    total_timesteps    | 896000   |\n","| train/                |          |\n","|    entropy_loss       | 0.196    |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44799    |\n","|    policy_loss        | -0.00151 |\n","|    std                | 0.269    |\n","|    value_loss         | 0.00019  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.61     |\n","|    ep_rew_mean        | -0.202   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 44900    |\n","|    time_elapsed       | 2437     |\n","|    total_timesteps    | 898000   |\n","| train/                |          |\n","|    entropy_loss       | 0.185    |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44899    |\n","|    policy_loss        | -0.0031  |\n","|    std                | 0.271    |\n","|    value_loss         | 0.000298 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.213   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 45000    |\n","|    time_elapsed       | 2442     |\n","|    total_timesteps    | 900000   |\n","| train/                |          |\n","|    entropy_loss       | 0.198    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44999    |\n","|    policy_loss        | 0.00106  |\n","|    std                | 0.271    |\n","|    value_loss         | 0.000171 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.203   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 45100    |\n","|    time_elapsed       | 2448     |\n","|    total_timesteps    | 902000   |\n","| train/                |          |\n","|    entropy_loss       | 0.225    |\n","|    explained_variance | 0.977    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45099    |\n","|    policy_loss        | 0.00321  |\n","|    std                | 0.269    |\n","|    value_loss         | 0.000174 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.72     |\n","|    ep_rew_mean        | -0.22    |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 45200    |\n","|    time_elapsed       | 2453     |\n","|    total_timesteps    | 904000   |\n","| train/                |          |\n","|    entropy_loss       | 0.24     |\n","|    explained_variance | 0.952    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45199    |\n","|    policy_loss        | -0.00208 |\n","|    std                | 0.267    |\n","|    value_loss         | 0.000242 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.66     |\n","|    ep_rew_mean        | -0.2     |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 45300    |\n","|    time_elapsed       | 2459     |\n","|    total_timesteps    | 906000   |\n","| train/                |          |\n","|    entropy_loss       | 0.271    |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45299    |\n","|    policy_loss        | -0.00553 |\n","|    std                | 0.265    |\n","|    value_loss         | 0.000167 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.67     |\n","|    ep_rew_mean        | -0.204   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 45400    |\n","|    time_elapsed       | 2463     |\n","|    total_timesteps    | 908000   |\n","| train/                |          |\n","|    entropy_loss       | 0.269    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45399    |\n","|    policy_loss        | 0.00133  |\n","|    std                | 0.266    |\n","|    value_loss         | 0.000363 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.8      |\n","|    ep_rew_mean        | -0.214   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 45500    |\n","|    time_elapsed       | 2469     |\n","|    total_timesteps    | 910000   |\n","| train/                |          |\n","|    entropy_loss       | 0.276    |\n","|    explained_variance | 0.964    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45499    |\n","|    policy_loss        | -0.00835 |\n","|    std                | 0.265    |\n","|    value_loss         | 0.000288 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.76     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 45600    |\n","|    time_elapsed       | 2474     |\n","|    total_timesteps    | 912000   |\n","| train/                |          |\n","|    entropy_loss       | 0.268    |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45599    |\n","|    policy_loss        | -0.00317 |\n","|    std                | 0.267    |\n","|    value_loss         | 0.000172 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.79     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 45700    |\n","|    time_elapsed       | 2479     |\n","|    total_timesteps    | 914000   |\n","| train/                |          |\n","|    entropy_loss       | 0.268    |\n","|    explained_variance | 0.964    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45699    |\n","|    policy_loss        | 0.00148  |\n","|    std                | 0.267    |\n","|    value_loss         | 0.000399 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.83     |\n","|    ep_rew_mean        | -0.218   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 45800    |\n","|    time_elapsed       | 2485     |\n","|    total_timesteps    | 916000   |\n","| train/                |          |\n","|    entropy_loss       | 0.279    |\n","|    explained_variance | 0.91     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45799    |\n","|    policy_loss        | 0.0106   |\n","|    std                | 0.267    |\n","|    value_loss         | 0.000879 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.54      |\n","|    ep_rew_mean        | -0.194    |\n","| time/                 |           |\n","|    fps                | 368       |\n","|    iterations         | 45900     |\n","|    time_elapsed       | 2490      |\n","|    total_timesteps    | 918000    |\n","| train/                |           |\n","|    entropy_loss       | 0.262     |\n","|    explained_variance | 0.988     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 45899     |\n","|    policy_loss        | -0.000651 |\n","|    std                | 0.268     |\n","|    value_loss         | 0.000179  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.214   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46000    |\n","|    time_elapsed       | 2496     |\n","|    total_timesteps    | 920000   |\n","| train/                |          |\n","|    entropy_loss       | 0.262    |\n","|    explained_variance | 0.986    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45999    |\n","|    policy_loss        | -0.00303 |\n","|    std                | 0.27     |\n","|    value_loss         | 0.000166 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.79     |\n","|    ep_rew_mean        | -0.211   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46100    |\n","|    time_elapsed       | 2500     |\n","|    total_timesteps    | 922000   |\n","| train/                |          |\n","|    entropy_loss       | 0.282    |\n","|    explained_variance | 0.978    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46099    |\n","|    policy_loss        | -0.00638 |\n","|    std                | 0.268    |\n","|    value_loss         | 0.000137 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.61     |\n","|    ep_rew_mean        | -0.193   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46200    |\n","|    time_elapsed       | 2506     |\n","|    total_timesteps    | 924000   |\n","| train/                |          |\n","|    entropy_loss       | 0.304    |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46199    |\n","|    policy_loss        | 0.00416  |\n","|    std                | 0.266    |\n","|    value_loss         | 0.000145 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.86     |\n","|    ep_rew_mean        | -0.223   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46300    |\n","|    time_elapsed       | 2511     |\n","|    total_timesteps    | 926000   |\n","| train/                |          |\n","|    entropy_loss       | 0.289    |\n","|    explained_variance | 0.969    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46299    |\n","|    policy_loss        | 0.00131  |\n","|    std                | 0.267    |\n","|    value_loss         | 0.000178 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.83     |\n","|    ep_rew_mean        | -0.212   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46400    |\n","|    time_elapsed       | 2516     |\n","|    total_timesteps    | 928000   |\n","| train/                |          |\n","|    entropy_loss       | 0.302    |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46399    |\n","|    policy_loss        | 0.00128  |\n","|    std                | 0.266    |\n","|    value_loss         | 0.000431 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.59     |\n","|    ep_rew_mean        | -0.194   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46500    |\n","|    time_elapsed       | 2522     |\n","|    total_timesteps    | 930000   |\n","| train/                |          |\n","|    entropy_loss       | 0.304    |\n","|    explained_variance | 0.961    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46499    |\n","|    policy_loss        | -0.0111  |\n","|    std                | 0.266    |\n","|    value_loss         | 0.000262 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.212   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46600    |\n","|    time_elapsed       | 2527     |\n","|    total_timesteps    | 932000   |\n","| train/                |          |\n","|    entropy_loss       | 0.308    |\n","|    explained_variance | 0.945    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46599    |\n","|    policy_loss        | 0.00992  |\n","|    std                | 0.266    |\n","|    value_loss         | 0.000554 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.77     |\n","|    ep_rew_mean        | -0.222   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46700    |\n","|    time_elapsed       | 2533     |\n","|    total_timesteps    | 934000   |\n","| train/                |          |\n","|    entropy_loss       | 0.304    |\n","|    explained_variance | 0.97     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46699    |\n","|    policy_loss        | -0.00132 |\n","|    std                | 0.266    |\n","|    value_loss         | 0.000187 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.22    |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46800    |\n","|    time_elapsed       | 2538     |\n","|    total_timesteps    | 936000   |\n","| train/                |          |\n","|    entropy_loss       | 0.32     |\n","|    explained_variance | 0.981    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46799    |\n","|    policy_loss        | -0.00506 |\n","|    std                | 0.264    |\n","|    value_loss         | 0.000267 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.57     |\n","|    ep_rew_mean        | -0.208   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 46900    |\n","|    time_elapsed       | 2543     |\n","|    total_timesteps    | 938000   |\n","| train/                |          |\n","|    entropy_loss       | 0.336    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46899    |\n","|    policy_loss        | -0.00688 |\n","|    std                | 0.263    |\n","|    value_loss         | 0.00018  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.21    |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47000    |\n","|    time_elapsed       | 2549     |\n","|    total_timesteps    | 940000   |\n","| train/                |          |\n","|    entropy_loss       | 0.341    |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46999    |\n","|    policy_loss        | -0.0197  |\n","|    std                | 0.262    |\n","|    value_loss         | 0.000537 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.68     |\n","|    ep_rew_mean        | -0.208   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47100    |\n","|    time_elapsed       | 2554     |\n","|    total_timesteps    | 942000   |\n","| train/                |          |\n","|    entropy_loss       | 0.343    |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47099    |\n","|    policy_loss        | -0.00182 |\n","|    std                | 0.263    |\n","|    value_loss         | 0.000144 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.6      |\n","|    ep_rew_mean        | -0.205   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47200    |\n","|    time_elapsed       | 2559     |\n","|    total_timesteps    | 944000   |\n","| train/                |          |\n","|    entropy_loss       | 0.351    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47199    |\n","|    policy_loss        | 0.00507  |\n","|    std                | 0.262    |\n","|    value_loss         | 9.76e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.74     |\n","|    ep_rew_mean        | -0.213   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47300    |\n","|    time_elapsed       | 2564     |\n","|    total_timesteps    | 946000   |\n","| train/                |          |\n","|    entropy_loss       | 0.367    |\n","|    explained_variance | 0.972    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47299    |\n","|    policy_loss        | -0.00172 |\n","|    std                | 0.261    |\n","|    value_loss         | 0.00016  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.75     |\n","|    ep_rew_mean        | -0.218   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47400    |\n","|    time_elapsed       | 2570     |\n","|    total_timesteps    | 948000   |\n","| train/                |          |\n","|    entropy_loss       | 0.37     |\n","|    explained_variance | 0.888    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47399    |\n","|    policy_loss        | 0.102    |\n","|    std                | 0.261    |\n","|    value_loss         | 0.0469   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 5.1      |\n","|    ep_rew_mean        | -0.472   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47500    |\n","|    time_elapsed       | 2575     |\n","|    total_timesteps    | 950000   |\n","| train/                |          |\n","|    entropy_loss       | 0.389    |\n","|    explained_variance | 0.975    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47499    |\n","|    policy_loss        | -0.072   |\n","|    std                | 0.259    |\n","|    value_loss         | 0.072    |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 5.64     |\n","|    ep_rew_mean        | -0.586   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47600    |\n","|    time_elapsed       | 2581     |\n","|    total_timesteps    | 952000   |\n","| train/                |          |\n","|    entropy_loss       | 0.376    |\n","|    explained_variance | 0.71     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47599    |\n","|    policy_loss        | -0.294   |\n","|    std                | 0.259    |\n","|    value_loss         | 2.33     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 6.25     |\n","|    ep_rew_mean        | -0.629   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47700    |\n","|    time_elapsed       | 2586     |\n","|    total_timesteps    | 954000   |\n","| train/                |          |\n","|    entropy_loss       | 0.381    |\n","|    explained_variance | 0.967    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47699    |\n","|    policy_loss        | 0.257    |\n","|    std                | 0.259    |\n","|    value_loss         | 0.465    |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 5.3      |\n","|    ep_rew_mean        | -0.563   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47800    |\n","|    time_elapsed       | 2592     |\n","|    total_timesteps    | 956000   |\n","| train/                |          |\n","|    entropy_loss       | 0.394    |\n","|    explained_variance | 0.963    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47799    |\n","|    policy_loss        | 0.666    |\n","|    std                | 0.257    |\n","|    value_loss         | 0.873    |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 4.31     |\n","|    ep_rew_mean        | -0.342   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 47900    |\n","|    time_elapsed       | 2598     |\n","|    total_timesteps    | 958000   |\n","| train/                |          |\n","|    entropy_loss       | 0.421    |\n","|    explained_variance | -25.6    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47899    |\n","|    policy_loss        | 0.537    |\n","|    std                | 0.254    |\n","|    value_loss         | 4.94     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 3.96     |\n","|    ep_rew_mean        | -0.317   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48000    |\n","|    time_elapsed       | 2603     |\n","|    total_timesteps    | 960000   |\n","| train/                |          |\n","|    entropy_loss       | 0.439    |\n","|    explained_variance | 0.44     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47999    |\n","|    policy_loss        | -0.0174  |\n","|    std                | 0.253    |\n","|    value_loss         | 0.0244   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.92     |\n","|    ep_rew_mean        | -0.233   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48100    |\n","|    time_elapsed       | 2609     |\n","|    total_timesteps    | 962000   |\n","| train/                |          |\n","|    entropy_loss       | 0.452    |\n","|    explained_variance | 0.72     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48099    |\n","|    policy_loss        | 0.0104   |\n","|    std                | 0.252    |\n","|    value_loss         | 0.0267   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.8      |\n","|    ep_rew_mean        | -0.224   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48200    |\n","|    time_elapsed       | 2613     |\n","|    total_timesteps    | 964000   |\n","| train/                |          |\n","|    entropy_loss       | 0.464    |\n","|    explained_variance | -0.27    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48199    |\n","|    policy_loss        | -0.0121  |\n","|    std                | 0.251    |\n","|    value_loss         | 0.0119   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.86     |\n","|    ep_rew_mean        | -0.228   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48300    |\n","|    time_elapsed       | 2619     |\n","|    total_timesteps    | 966000   |\n","| train/                |          |\n","|    entropy_loss       | 0.471    |\n","|    explained_variance | 0.367    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48299    |\n","|    policy_loss        | 0.0417   |\n","|    std                | 0.25     |\n","|    value_loss         | 0.0082   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 3.01     |\n","|    ep_rew_mean        | -0.238   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48400    |\n","|    time_elapsed       | 2624     |\n","|    total_timesteps    | 968000   |\n","| train/                |          |\n","|    entropy_loss       | 0.476    |\n","|    explained_variance | 0.0691   |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48399    |\n","|    policy_loss        | 0.0139   |\n","|    std                | 0.25     |\n","|    value_loss         | 0.0138   |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 3        |\n","|    ep_rew_mean        | -0.234   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48500    |\n","|    time_elapsed       | 2630     |\n","|    total_timesteps    | 970000   |\n","| train/                |          |\n","|    entropy_loss       | 0.458    |\n","|    explained_variance | 0.69     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48499    |\n","|    policy_loss        | -0.00865 |\n","|    std                | 0.252    |\n","|    value_loss         | 0.00166  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.91     |\n","|    ep_rew_mean        | -0.227   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48600    |\n","|    time_elapsed       | 2635     |\n","|    total_timesteps    | 972000   |\n","| train/                |          |\n","|    entropy_loss       | 0.471    |\n","|    explained_variance | 0.68     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48599    |\n","|    policy_loss        | -0.00833 |\n","|    std                | 0.251    |\n","|    value_loss         | 0.00228  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.92     |\n","|    ep_rew_mean        | -0.233   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48700    |\n","|    time_elapsed       | 2640     |\n","|    total_timesteps    | 974000   |\n","| train/                |          |\n","|    entropy_loss       | 0.488    |\n","|    explained_variance | 0.727    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48699    |\n","|    policy_loss        | 0.00807  |\n","|    std                | 0.25     |\n","|    value_loss         | 0.00116  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.217   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48800    |\n","|    time_elapsed       | 2646     |\n","|    total_timesteps    | 976000   |\n","| train/                |          |\n","|    entropy_loss       | 0.49     |\n","|    explained_variance | 0.806    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48799    |\n","|    policy_loss        | 0.0138   |\n","|    std                | 0.25     |\n","|    value_loss         | 0.00104  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.87     |\n","|    ep_rew_mean        | -0.228   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 48900    |\n","|    time_elapsed       | 2650     |\n","|    total_timesteps    | 978000   |\n","| train/                |          |\n","|    entropy_loss       | 0.475    |\n","|    explained_variance | 0.828    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48899    |\n","|    policy_loss        | 0.0108   |\n","|    std                | 0.25     |\n","|    value_loss         | 0.00147  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.218   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 49000    |\n","|    time_elapsed       | 2656     |\n","|    total_timesteps    | 980000   |\n","| train/                |          |\n","|    entropy_loss       | 0.477    |\n","|    explained_variance | 0.85     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48999    |\n","|    policy_loss        | 0.00286  |\n","|    std                | 0.25     |\n","|    value_loss         | 0.00109  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.8      |\n","|    ep_rew_mean        | -0.222   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 49100    |\n","|    time_elapsed       | 2661     |\n","|    total_timesteps    | 982000   |\n","| train/                |          |\n","|    entropy_loss       | 0.48     |\n","|    explained_variance | 0.967    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49099    |\n","|    policy_loss        | -0.00159 |\n","|    std                | 0.25     |\n","|    value_loss         | 0.000527 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.98     |\n","|    ep_rew_mean        | -0.233   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 49200    |\n","|    time_elapsed       | 2667     |\n","|    total_timesteps    | 984000   |\n","| train/                |          |\n","|    entropy_loss       | 0.493    |\n","|    explained_variance | 0.943    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49199    |\n","|    policy_loss        | 0.00435  |\n","|    std                | 0.249    |\n","|    value_loss         | 0.000571 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 2.84      |\n","|    ep_rew_mean        | -0.222    |\n","| time/                 |           |\n","|    fps                | 368       |\n","|    iterations         | 49300     |\n","|    time_elapsed       | 2672      |\n","|    total_timesteps    | 986000    |\n","| train/                |           |\n","|    entropy_loss       | 0.486     |\n","|    explained_variance | 0.918     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 49299     |\n","|    policy_loss        | -0.000171 |\n","|    std                | 0.25      |\n","|    value_loss         | 0.000799  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.65     |\n","|    ep_rew_mean        | -0.204   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 49400    |\n","|    time_elapsed       | 2677     |\n","|    total_timesteps    | 988000   |\n","| train/                |          |\n","|    entropy_loss       | 0.486    |\n","|    explained_variance | 0.969    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49399    |\n","|    policy_loss        | -0.00756 |\n","|    std                | 0.251    |\n","|    value_loss         | 0.000684 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.71     |\n","|    ep_rew_mean        | -0.206   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 49500    |\n","|    time_elapsed       | 2683     |\n","|    total_timesteps    | 990000   |\n","| train/                |          |\n","|    entropy_loss       | 0.488    |\n","|    explained_variance | 0.949    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49499    |\n","|    policy_loss        | -0.0265  |\n","|    std                | 0.251    |\n","|    value_loss         | 0.000959 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.75     |\n","|    ep_rew_mean        | -0.223   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 49600    |\n","|    time_elapsed       | 2689     |\n","|    total_timesteps    | 992000   |\n","| train/                |          |\n","|    entropy_loss       | 0.494    |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49599    |\n","|    policy_loss        | -0.00128 |\n","|    std                | 0.25     |\n","|    value_loss         | 0.000325 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.73     |\n","|    ep_rew_mean        | -0.209   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 49700    |\n","|    time_elapsed       | 2695     |\n","|    total_timesteps    | 994000   |\n","| train/                |          |\n","|    entropy_loss       | 0.497    |\n","|    explained_variance | 0.926    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49699    |\n","|    policy_loss        | -0.00763 |\n","|    std                | 0.25     |\n","|    value_loss         | 0.000821 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.7      |\n","|    ep_rew_mean        | -0.209   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 49800    |\n","|    time_elapsed       | 2700     |\n","|    total_timesteps    | 996000   |\n","| train/                |          |\n","|    entropy_loss       | 0.501    |\n","|    explained_variance | 0.953    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49799    |\n","|    policy_loss        | 0.00254  |\n","|    std                | 0.249    |\n","|    value_loss         | 0.000549 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.78     |\n","|    ep_rew_mean        | -0.207   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 49900    |\n","|    time_elapsed       | 2706     |\n","|    total_timesteps    | 998000   |\n","| train/                |          |\n","|    entropy_loss       | 0.514    |\n","|    explained_variance | 0.943    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49899    |\n","|    policy_loss        | -0.0087  |\n","|    std                | 0.248    |\n","|    value_loss         | 0.000554 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 2.62     |\n","|    ep_rew_mean        | -0.197   |\n","| time/                 |          |\n","|    fps                | 368      |\n","|    iterations         | 50000    |\n","|    time_elapsed       | 2711     |\n","|    total_timesteps    | 1000000  |\n","| train/                |          |\n","|    entropy_loss       | 0.51     |\n","|    explained_variance | 0.94     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49999    |\n","|    policy_loss        | 0.0144   |\n","|    std                | 0.249    |\n","|    value_loss         | 0.000603 |\n","------------------------------------\n"]},{"data":{"text/plain":["<stable_baselines3.a2c.a2c.A2C at 0x79a90ee30340>"]},"execution_count":12,"metadata":{},"output_type":"execute_result"}],"source":["model.learn(1_000_000)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"MfYtjj19cKFr"},"outputs":[],"source":["# Save the model and  VecNormalize statistics when saving the agent\n","model.save(\"a2c-PandaReachDense-v3\")\n","env.save(\"vec_normalize.pkl\")"]},{"cell_type":"markdown","metadata":{"id":"01M9GCd32Ig-"},"source":["### Evaluate the agent 📈\n","- Now that's our  agent is trained, we need to **check its performance**.\n","- Stable-Baselines3 provides a method to do that: `evaluate_policy`"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":400,"status":"ok","timestamp":1697797344047,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"liirTVoDkHq3","outputId":"eb7de6dc-2b1b-426b-c0d8-f886e0305c2b"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mean reward = -0.22 +/- 0.11\n"]},{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n","  warnings.warn(\n"]}],"source":["from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n","\n","# Load the saved statistics\n","eval_env = DummyVecEnv([lambda: gym.make(\"PandaReachDense-v3\")])\n","eval_env = VecNormalize.load(\"vec_normalize.pkl\", eval_env)\n","\n","# We need to override the render_mode\n","eval_env.render_mode = \"rgb_array\"\n","\n","#  do not update them at test time\n","eval_env.training = False\n","# reward normalization is not needed at test time\n","eval_env.norm_reward = False\n","\n","# Load the agent\n","model = A2C.load(\"a2c-PandaReachDense-v3\")\n","\n","mean_reward, std_reward = evaluate_policy(model, eval_env)\n","\n","print(f\"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}\")"]},{"cell_type":"markdown","metadata":{"id":"44L9LVQaavR8"},"source":["### Publish your trained model on the Hub 🔥\n","Now that we saw we got good results after the training, we can publish our trained model on the Hub with one line of code.\n","\n","📚 The libraries documentation 👉 https://github.com/huggingface/huggingface_sb3/tree/main#hugging-face--x-stable-baselines3-v20\n"]},{"cell_type":"markdown","metadata":{"id":"MkMk99m8bgaQ"},"source":["By using `package_to_hub`, as we already mentionned in the former units, **you evaluate, record a replay, generate a model card of your agent and push it to the hub**.\n","\n","This way:\n","- You can **showcase our work** 🔥\n","- You can **visualize your agent playing** 👀\n","- You can **share with the community an agent that others can use** 💾\n","- You can **access a leaderboard 🏆 to see how well your agent is performing compared to your classmates** 👉 https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard\n"]},{"cell_type":"markdown","metadata":{"id":"JquRrWytA6eo"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1️⃣ (If it's not already done) create an account to HF ➡ https://huggingface.co/join\n","\n","2️⃣ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/notebooks/create-token.jpg\" alt=\"Create HF Token\">\n","\n","- Copy the token\n","- Run the cell below and paste the token"]},{"cell_type":"code","execution_count":20,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["9cacf8dbc7a94fa481e1cc40676692c8","e1bcfc8d49874c0a87984a1519ce9a02","9c37b1a06401475892e32e64cfc7228b","7d05761914c6458abd83d3320d5e6774","4fe7ce97f96c4d3ea8d42decfd98c7fd","3e23a54fdb7044d38aaf172664698871","3158042f4a7747b083201b336c509833","9d9554707a1842c9a21b1d7df3a6e8ea","005095e28ba94fab9433cc3e46079300","8d515b922b3149ef997885c4a1b8f57d","76fca4ceb1b74730ad80914a5526a33a","0b24db20f40e460c91fa1090d0286e80","0cd333170d704dde92b6826238507f38","4f7f847624ad487ba3e8e0c136fd86ee","b8228ef61d2147368167e35c75a14b2b","9b9e93708a824114abdb56ffb3a6bbdd","9dd1a10e441c4a3da2c870f40c55d047","5f30f9cbb28f4826af8b9c9b87ec5b88","18bbbe029e1f42bea69347d5de28d2cb","ffa9936e6f3846b9b215c6e03396106e","d0f67ad35369477185dbcf6b0a7c07c3","04bbbd03a7c94340a458d0dcd7f1bca5","8c3c6264b6284db3957ed7f15a90a601","70f52659a1d84dc7ac1ed4648c4de55d","a0f8c961d3c64db08c3555bd566fe955","db52efeddaff4046aad1067fefe78656","d7a899e7eaa8452fb8caf7f064ca96c3","66a44c2579634b54860458073bec84be","4e1d4dfdb773409d8f24f035608de8ab","d8bdb828b07b442eb51a0499c552dc5a","32c2bbf73b5549e28273810c28419711","fbdfe5c1594543138016fb9cd417930e"]},"executionInfo":{"elapsed":203,"status":"ok","timestamp":1697806081201,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"GZiFBBlzxzxY","outputId":"9bb2cd88-a0bd-46bd-d867-0cb36685e805"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"9cacf8dbc7a94fa481e1cc40676692c8"}},"metadata":{}}],"source":["notebook_login()\n","!git config --global credential.helper store"]},{"cell_type":"markdown","metadata":{"id":"_tsf2uv0g_4p"},"source":["If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login`"]},{"cell_type":"markdown","metadata":{"id":"FGNh9VsZok0i"},"source":["3️⃣ We're now ready to push our trained agent to the 🤗 Hub 🔥 using `package_to_hub()` function"]},{"cell_type":"markdown","metadata":{"id":"juxItTNf1W74"},"source":["For this environment, **running this cell can take approximately 10min**"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":543,"referenced_widgets":["cf3e525363dc4c6c89daa667732a68b8","20ae4498ab524acaab04f0d21cf2a8ce","678252b688c84e33b3bc832756ac68d2","f8329a0a6dea43db9879e5d011af5fee","0ff9d960df92419f9fd288dd6b618071","3fc0651477c24fbf81d97bd5dcb841f8","75ad83ef32ec418a94e095aebc0cb985","e8bc7022a6874202aa151b4db213f72c","73c9dfddecb44fcfa87786c9d65e274e","bf3de84002be4b77813017549727ab3b","a856c29927664d8b9321d85b5b3cb3dc","bd79e66fd19d45298dc1d6547942cf32","89ef19f39dce4674a2a6146c8eb70146","af9d0aa3438a49638d7d7e4ded636508","ad5d3851a13a4c159ccdbec4dce5d734","049e2a4373be4f81a52be5a71516c526","92efda203d24453a9a12bf051289febf","98f1895dbcd047288b3049b250db4e98","1407f798d5ae4d758109d725772f584b","1673319fc069427288593cb2fefef641","834893cddc7b4518ad9f58de74b9df26","40e030067ccb475cac78dc4a430736ca","aa165a5cc209400fb50c46c5669b4f5a","028e6e2c1a03429b8373796a2d15f967","fe209bc1ebce42139af45e4f0b4ed3c6","03723069ce9b4425a22d248e7aeb78b1","8cce6df0cb3b4d188f8d92e8455bf4a2","6616f6dcdf144fa8932aff1ba15465c9","8bf2d5b027704604af3f73ec14ddb21a","6ab04382062f4119a74bab2d1bae271e","6f6579e6aca445c6ac139fa5ca46a659","352ea4dca3e84a10bc0961f0bc04c8ed","19bce1eac4e448cf8fff65ec32d4bdff","db0467c7229944f6aed12ebe1dcd5fe7","45ed0117c2d1482a922d3922cc8fc955","d2f082ae3be04798bab8442360d8f0df","7dd3d3c4569f4a92afaf3f044a2a6cc8","62f79e70b55545deb23cd27b8f48a5ec","9a9d2950aca641cdac9b87b95f3fb6b4","e14a2798b6b24a3cb6d084266ca295bb","e80750311a3c4c528162e40f5899cadf","bda773c9317c45c3a684191ff207a93c","02592ae9ec374e17bdb94cd63f2da4bd","67fb52d7ed0f447f811f8782ee3ea378","06ce39f472d2434b84146a451937a445","5d05a3d491c24cc98443664571baefdf","ab92784efd82433a980743969c68ad7d","277962f6adf04e8a8ac0b62299c61deb","2db79e59c6e8432fad82d2eb1f3fc7b9","c9389a3714cb4dc08064be1f9205632f","e481844b06824b0bacaf92ce8839a432","d1852a9716694263aea37c369396a1a2","9da8d2763c704661932950c8e9d55050","d04fd69fbca14c11a843094501a2cf37","a9b0ed8956644ad7945dde85dd827be2","396f842ece3d4b9980926f0bb7ac3ce4","066e7917847e4ae8b122bcd4ada26c62","fa8361afc19c460cb31259401ee38e3e","89c1f8c6c17d4a9792c78ef20daf9ac8","1edf516ba9744c2ea3430a450f11f199","e2f0c8219f404efe82bf10e2fe4519c7","9a0dc2519e464195a25808bacdb42100","3999835733484e52b53d5e498ae53b7d","32acd81ca6df45b6aa8b366de0499587","5296d67b4b0f4cdb850d50f3d7f80e77","b073e64700c94739a22fac4388517029"]},"executionInfo":{"elapsed":334210,"status":"ok","timestamp":1697798094894,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"V1N8r8QVwcCE","outputId":"57ca5f7a-279e-4d2d-99cd-f6da73fafdbb"},"outputs":[{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n","  and should_run_async(code)\n"]},{"name":"stdout","output_type":"stream","text":["\u001b[38;5;4mℹ This function will save, evaluate, generate a video of your agent,\n","create a model card and push everything to the hub. It might take up to 1min.\n","This is a work in progress: if you encounter a bug, please open an issue.\u001b[0m\n"]},{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n","  warnings.warn(\n"]},{"name":"stdout","output_type":"stream","text":["Saving video to /tmp/tmpseb8u7bm/-step-0-to-step-1000.mp4\n","Moviepy - Building video /tmp/tmpseb8u7bm/-step-0-to-step-1000.mp4.\n","Moviepy - Writing video /tmp/tmpseb8u7bm/-step-0-to-step-1000.mp4\n","\n"]},{"name":"stderr","output_type":"stream","text":[]},{"name":"stdout","output_type":"stream","text":["Moviepy - Done !\n","Moviepy - video ready /tmp/tmpseb8u7bm/-step-0-to-step-1000.mp4\n","\u001b[38;5;4mℹ Pushing repo jake-walker/a2c-PandaReachDense-v3 to the Hugging Face\n","Hub\u001b[0m\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"cf3e525363dc4c6c89daa667732a68b8","version_major":2,"version_minor":0},"text/plain":["policy.optimizer.pth:   0%|          | 0.00/45.2k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"bd79e66fd19d45298dc1d6547942cf32","version_major":2,"version_minor":0},"text/plain":["policy.pth:   0%|          | 0.00/46.4k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"aa165a5cc209400fb50c46c5669b4f5a","version_major":2,"version_minor":0},"text/plain":["Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"db0467c7229944f6aed12ebe1dcd5fe7","version_major":2,"version_minor":0},"text/plain":["a2c-PandaReachDense-v3.zip:   0%|          | 0.00/108k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"06ce39f472d2434b84146a451937a445","version_major":2,"version_minor":0},"text/plain":["pytorch_variables.pth:   0%|          | 0.00/864 [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"396f842ece3d4b9980926f0bb7ac3ce4","version_major":2,"version_minor":0},"text/plain":["vec_normalize.pkl:   0%|          | 0.00/2.62k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["\u001b[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:\n","https://huggingface.co/jake-walker/a2c-PandaReachDense-v3/tree/main/\u001b[0m\n"]},{"data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'https://huggingface.co/jake-walker/a2c-PandaReachDense-v3/tree/main/'"]},"execution_count":16,"metadata":{},"output_type":"execute_result"}],"source":["from huggingface_sb3 import package_to_hub\n","\n","package_to_hub(\n","    model=model,\n","    model_name=f\"a2c-{env_id}\",\n","    model_architecture=\"A2C\",\n","    env_id=env_id,\n","    eval_env=eval_env,\n","    repo_id=f\"jake-walker/a2c-{env_id}\", # Change the username\n","    commit_message=\"Initial commit\",\n",")"]},{"cell_type":"markdown","metadata":{"id":"G3xy3Nf3c2O1"},"source":["## Some additional challenges 🏆\n","The best way to learn **is to try things by your own**! Why not trying  `PandaPickAndPlace-v3`?\n","\n","If you want to try more advanced tasks for panda-gym, you need to check what was done using **TQC or SAC** (a more sample-efficient algorithm suited for robotics tasks). In real robotics, you'll use a more sample-efficient algorithm for a simple reason: contrary to a simulation **if you move your robotic arm too much, you have a risk of breaking it**.\n","\n","PandaPickAndPlace-v1 (this model uses the v1 version of the environment): https://huggingface.co/sb3/tqc-PandaPickAndPlace-v1\n","\n","And don't hesitate to check panda-gym documentation here: https://panda-gym.readthedocs.io/en/latest/usage/train_with_sb3.html\n","\n","We provide you the steps to train another agent (optional):\n","\n","1. Define the environment called \"PandaPickAndPlace-v3\"\n","2. Make a vectorized environment\n","3. Add a wrapper to normalize the observations and rewards. [Check the documentation](https://stable-baselines3.readthedocs.io/en/master/guide/vec_envs.html#vecnormalize)\n","4. Create the A2C Model (don't forget verbose=1 to print the training logs).\n","5. Train it for 1M Timesteps\n","6. Save the model and  VecNormalize statistics when saving the agent\n","7. Evaluate your agent\n","8. Publish your trained model on the Hub 🔥 with `package_to_hub`\n"]},{"cell_type":"code","execution_count":13,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":445,"status":"ok","timestamp":1697802723250,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"wh9WotsmqWzJ","outputId":"4d6093c9-9d5b-4652-a39a-5750821a8181"},"outputs":[{"output_type":"stream","name":"stdout","text":["State space: Shape=Dict('achieved_goal': Box(-10.0, 10.0, (3,), float32), 'desired_goal': Box(-10.0, 10.0, (3,), float32), 'observation': Box(-10.0, 10.0, (19,), float32)), Sample=OrderedDict([('achieved_goal', array([-4.42364 ,  3.242822, -8.34239 ], dtype=float32)), ('desired_goal', array([-0.78497666,  7.3610096 , -3.1569993 ], dtype=float32)), ('observation', array([ 2.7180037,  1.2275052,  8.955621 , -1.9594926, -2.1055825,\n","        4.42845  ,  3.8961627,  9.519434 ,  7.919012 , -1.5903567,\n","       -5.681227 , -7.9085526, -3.8844745, -4.5171604,  5.814084 ,\n","        5.5779295,  8.845796 , -0.5498591,  2.4449365], dtype=float32))])\n","Action space: Shape=Box(-1.0, 1.0, (4,), float32), Sample=[-0.7739542   0.9475336  -0.5265528   0.04163277]\n"]}],"source":["env_id = \"PandaPickAndPlace-v3\"\n","\n","env = gym.make(env_id)\n","\n","print(f\"State space: Shape={env.observation_space}, Sample={env.observation_space.sample()}\")\n","print(f\"Action space: Shape={env.action_space}, Sample={env.action_space.sample()}\")"]},{"cell_type":"code","execution_count":14,"metadata":{"executionInfo":{"elapsed":1534,"status":"ok","timestamp":1697802726697,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"K4oxRKADrgWW"},"outputs":[],"source":["env = make_vec_env(env_id, n_envs=4)\n","env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)"]},{"cell_type":"code","execution_count":15,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1697802726697,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"PXJuF-1wrvjI","outputId":"096482fa-d379-4734-f471-adde72878472"},"outputs":[{"output_type":"stream","name":"stdout","text":["Using cuda device\n"]}],"source":["model = A2C(policy=\"MultiInputPolicy\", env=env, verbose=1)"]},{"cell_type":"code","execution_count":16,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Y5rTpyGMsAzy","executionInfo":{"status":"ok","timestamp":1697805826543,"user_tz":-60,"elapsed":3099847,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"bf32189e-195d-4bc6-945f-a5f3ec5b3c70"},"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 22300    |\n","|    time_elapsed       | 1392     |\n","|    total_timesteps    | 446000   |\n","| train/                |          |\n","|    entropy_loss       | -4.54    |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22299    |\n","|    policy_loss        | 0.00738  |\n","|    std                | 0.753    |\n","|    value_loss         | 1.64e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 22400    |\n","|    time_elapsed       | 1397     |\n","|    total_timesteps    | 448000   |\n","| train/                |          |\n","|    entropy_loss       | -4.54    |\n","|    explained_variance | 0.962    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22399    |\n","|    policy_loss        | 0.00605  |\n","|    std                | 0.754    |\n","|    value_loss         | 7.36e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 22500    |\n","|    time_elapsed       | 1404     |\n","|    total_timesteps    | 450000   |\n","| train/                |          |\n","|    entropy_loss       | -4.52    |\n","|    explained_variance | 0.531    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22499    |\n","|    policy_loss        | -0.00612 |\n","|    std                | 0.75     |\n","|    value_loss         | 3.98e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 22600    |\n","|    time_elapsed       | 1410     |\n","|    total_timesteps    | 452000   |\n","| train/                |          |\n","|    entropy_loss       | -4.51    |\n","|    explained_variance | 0.964    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22599    |\n","|    policy_loss        | 0.0125   |\n","|    std                | 0.748    |\n","|    value_loss         | 3.25e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 22700    |\n","|    time_elapsed       | 1417     |\n","|    total_timesteps    | 454000   |\n","| train/                |          |\n","|    entropy_loss       | -4.52    |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22699    |\n","|    policy_loss        | -0.0043  |\n","|    std                | 0.749    |\n","|    value_loss         | 7.3e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 22800    |\n","|    time_elapsed       | 1422     |\n","|    total_timesteps    | 456000   |\n","| train/                |          |\n","|    entropy_loss       | -4.5     |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22799    |\n","|    policy_loss        | 0.000382 |\n","|    std                | 0.747    |\n","|    value_loss         | 6.38e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 22900    |\n","|    time_elapsed       | 1430     |\n","|    total_timesteps    | 458000   |\n","| train/                |          |\n","|    entropy_loss       | -4.5     |\n","|    explained_variance | 0.268    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22899    |\n","|    policy_loss        | -0.00389 |\n","|    std                | 0.747    |\n","|    value_loss         | 2.33e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 23000    |\n","|    time_elapsed       | 1435     |\n","|    total_timesteps    | 460000   |\n","| train/                |          |\n","|    entropy_loss       | -4.5     |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 22999    |\n","|    policy_loss        | 0.0114   |\n","|    std                | 0.746    |\n","|    value_loss         | 8.51e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 23100    |\n","|    time_elapsed       | 1442     |\n","|    total_timesteps    | 462000   |\n","| train/                |          |\n","|    entropy_loss       | -4.51    |\n","|    explained_variance | 0.933    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23099    |\n","|    policy_loss        | 0.00367  |\n","|    std                | 0.747    |\n","|    value_loss         | 1.66e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 50       |\n","|    ep_rew_mean        | -50      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 23200    |\n","|    time_elapsed       | 1448     |\n","|    total_timesteps    | 464000   |\n","| train/                |          |\n","|    entropy_loss       | -4.5     |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23199    |\n","|    policy_loss        | 0.00437  |\n","|    std                | 0.746    |\n","|    value_loss         | 1.33e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 50        |\n","|    ep_rew_mean        | -50       |\n","| time/                 |           |\n","|    fps                | 320       |\n","|    iterations         | 23300     |\n","|    time_elapsed       | 1454      |\n","|    total_timesteps    | 466000    |\n","| train/                |           |\n","|    entropy_loss       | -4.51     |\n","|    explained_variance | 0.831     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 23299     |\n","|    policy_loss        | -0.000284 |\n","|    std                | 0.748     |\n","|    value_loss         | 1.05e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.1     |\n","|    ep_rew_mean        | -49.1    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 23400    |\n","|    time_elapsed       | 1460     |\n","|    total_timesteps    | 468000   |\n","| train/                |          |\n","|    entropy_loss       | -4.53    |\n","|    explained_variance | 0.974    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23399    |\n","|    policy_loss        | 0.00391  |\n","|    std                | 0.752    |\n","|    value_loss         | 3.87e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.9     |\n","|    ep_rew_mean        | -47.9    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 23500    |\n","|    time_elapsed       | 1467     |\n","|    total_timesteps    | 470000   |\n","| train/                |          |\n","|    entropy_loss       | -4.53    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23499    |\n","|    policy_loss        | -0.0114  |\n","|    std                | 0.752    |\n","|    value_loss         | 8.08e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.3     |\n","|    ep_rew_mean        | -47.3    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 23600    |\n","|    time_elapsed       | 1472     |\n","|    total_timesteps    | 472000   |\n","| train/                |          |\n","|    entropy_loss       | -4.51    |\n","|    explained_variance | 0.965    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23599    |\n","|    policy_loss        | -0.00803 |\n","|    std                | 0.747    |\n","|    value_loss         | 1.28e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.8     |\n","|    ep_rew_mean        | -47.8    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 23700    |\n","|    time_elapsed       | 1478     |\n","|    total_timesteps    | 474000   |\n","| train/                |          |\n","|    entropy_loss       | -4.5     |\n","|    explained_variance | 0.981    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23699    |\n","|    policy_loss        | -0.0159  |\n","|    std                | 0.747    |\n","|    value_loss         | 2.18e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 23800    |\n","|    time_elapsed       | 1484     |\n","|    total_timesteps    | 476000   |\n","| train/                |          |\n","|    entropy_loss       | -4.47    |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23799    |\n","|    policy_loss        | 0.00631  |\n","|    std                | 0.74     |\n","|    value_loss         | 1.77e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 23900    |\n","|    time_elapsed       | 1490     |\n","|    total_timesteps    | 478000   |\n","| train/                |          |\n","|    entropy_loss       | -4.46    |\n","|    explained_variance | 0.98     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23899    |\n","|    policy_loss        | 0.0138   |\n","|    std                | 0.738    |\n","|    value_loss         | 1.99e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 24000    |\n","|    time_elapsed       | 1496     |\n","|    total_timesteps    | 480000   |\n","| train/                |          |\n","|    entropy_loss       | -4.47    |\n","|    explained_variance | 0.973    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 23999    |\n","|    policy_loss        | 0.00103  |\n","|    std                | 0.74     |\n","|    value_loss         | 2.38e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 50       |\n","|    ep_rew_mean        | -50      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 24100    |\n","|    time_elapsed       | 1502     |\n","|    total_timesteps    | 482000   |\n","| train/                |          |\n","|    entropy_loss       | -4.46    |\n","|    explained_variance | 0.969    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24099    |\n","|    policy_loss        | 0.00774  |\n","|    std                | 0.739    |\n","|    value_loss         | 6.57e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 24200    |\n","|    time_elapsed       | 1509     |\n","|    total_timesteps    | 484000   |\n","| train/                |          |\n","|    entropy_loss       | -4.49    |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24199    |\n","|    policy_loss        | -0.00428 |\n","|    std                | 0.744    |\n","|    value_loss         | 1.34e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 24300    |\n","|    time_elapsed       | 1514     |\n","|    total_timesteps    | 486000   |\n","| train/                |          |\n","|    entropy_loss       | -4.46    |\n","|    explained_variance | 0.853    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24299    |\n","|    policy_loss        | -0.0288  |\n","|    std                | 0.739    |\n","|    value_loss         | 0.000101 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 24400    |\n","|    time_elapsed       | 1521     |\n","|    total_timesteps    | 488000   |\n","| train/                |          |\n","|    entropy_loss       | -4.44    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24399    |\n","|    policy_loss        | -0.0131  |\n","|    std                | 0.735    |\n","|    value_loss         | 1.31e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 24500    |\n","|    time_elapsed       | 1526     |\n","|    total_timesteps    | 490000   |\n","| train/                |          |\n","|    entropy_loss       | -4.43    |\n","|    explained_variance | 0.958    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24499    |\n","|    policy_loss        | -0.00342 |\n","|    std                | 0.734    |\n","|    value_loss         | 1.79e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 24600    |\n","|    time_elapsed       | 1533     |\n","|    total_timesteps    | 492000   |\n","| train/                |          |\n","|    entropy_loss       | -4.43    |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24599    |\n","|    policy_loss        | 0.00613  |\n","|    std                | 0.733    |\n","|    value_loss         | 2.72e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.7     |\n","|    ep_rew_mean        | -48.6    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 24700    |\n","|    time_elapsed       | 1538     |\n","|    total_timesteps    | 494000   |\n","| train/                |          |\n","|    entropy_loss       | -4.45    |\n","|    explained_variance | 0.948    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24699    |\n","|    policy_loss        | -0.0112  |\n","|    std                | 0.738    |\n","|    value_loss         | 9.85e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.7     |\n","|    ep_rew_mean        | -48.6    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 24800    |\n","|    time_elapsed       | 1545     |\n","|    total_timesteps    | 496000   |\n","| train/                |          |\n","|    entropy_loss       | -4.46    |\n","|    explained_variance | 0.981    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24799    |\n","|    policy_loss        | 0.00358  |\n","|    std                | 0.738    |\n","|    value_loss         | 2.94e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 49        |\n","|    ep_rew_mean        | -49       |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 24900     |\n","|    time_elapsed       | 1551      |\n","|    total_timesteps    | 498000    |\n","| train/                |           |\n","|    entropy_loss       | -4.44     |\n","|    explained_variance | 0.993     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 24899     |\n","|    policy_loss        | -0.000853 |\n","|    std                | 0.736     |\n","|    value_loss         | 5.11e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.6     |\n","|    ep_rew_mean        | -47.6    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25000    |\n","|    time_elapsed       | 1557     |\n","|    total_timesteps    | 500000   |\n","| train/                |          |\n","|    entropy_loss       | -4.44    |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 24999    |\n","|    policy_loss        | -0.00074 |\n","|    std                | 0.734    |\n","|    value_loss         | 8.6e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.2     |\n","|    ep_rew_mean        | -47.1    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25100    |\n","|    time_elapsed       | 1563     |\n","|    total_timesteps    | 502000   |\n","| train/                |          |\n","|    entropy_loss       | -4.44    |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25099    |\n","|    policy_loss        | -0.00915 |\n","|    std                | 0.735    |\n","|    value_loss         | 1.01e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25200    |\n","|    time_elapsed       | 1568     |\n","|    total_timesteps    | 504000   |\n","| train/                |          |\n","|    entropy_loss       | -4.42    |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25199    |\n","|    policy_loss        | -0.0012  |\n","|    std                | 0.732    |\n","|    value_loss         | 1.22e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.6     |\n","|    ep_rew_mean        | -47.6    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25300    |\n","|    time_elapsed       | 1575     |\n","|    total_timesteps    | 506000   |\n","| train/                |          |\n","|    entropy_loss       | -4.43    |\n","|    explained_variance | 0.986    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25299    |\n","|    policy_loss        | -0.00306 |\n","|    std                | 0.732    |\n","|    value_loss         | 6.56e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.6     |\n","|    ep_rew_mean        | -47.6    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25400    |\n","|    time_elapsed       | 1581     |\n","|    total_timesteps    | 508000   |\n","| train/                |          |\n","|    entropy_loss       | -4.42    |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25399    |\n","|    policy_loss        | 0.0013   |\n","|    std                | 0.732    |\n","|    value_loss         | 5.06e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25500    |\n","|    time_elapsed       | 1587     |\n","|    total_timesteps    | 510000   |\n","| train/                |          |\n","|    entropy_loss       | -4.4     |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25499    |\n","|    policy_loss        | 0.00627  |\n","|    std                | 0.728    |\n","|    value_loss         | 5.71e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25600    |\n","|    time_elapsed       | 1593     |\n","|    total_timesteps    | 512000   |\n","| train/                |          |\n","|    entropy_loss       | -4.41    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25599    |\n","|    policy_loss        | -0.00841 |\n","|    std                | 0.729    |\n","|    value_loss         | 1.27e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25700    |\n","|    time_elapsed       | 1599     |\n","|    total_timesteps    | 514000   |\n","| train/                |          |\n","|    entropy_loss       | -4.41    |\n","|    explained_variance | 0.974    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25699    |\n","|    policy_loss        | 0.00429  |\n","|    std                | 0.729    |\n","|    value_loss         | 6.22e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25800    |\n","|    time_elapsed       | 1605     |\n","|    total_timesteps    | 516000   |\n","| train/                |          |\n","|    entropy_loss       | -4.4     |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25799    |\n","|    policy_loss        | 0.00246  |\n","|    std                | 0.729    |\n","|    value_loss         | 2.96e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 25900    |\n","|    time_elapsed       | 1611     |\n","|    total_timesteps    | 518000   |\n","| train/                |          |\n","|    entropy_loss       | -4.41    |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25899    |\n","|    policy_loss        | -0.00309 |\n","|    std                | 0.73     |\n","|    value_loss         | 1.62e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26000    |\n","|    time_elapsed       | 1617     |\n","|    total_timesteps    | 520000   |\n","| train/                |          |\n","|    entropy_loss       | -4.39    |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 25999    |\n","|    policy_loss        | 0.00213  |\n","|    std                | 0.726    |\n","|    value_loss         | 2.51e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26100    |\n","|    time_elapsed       | 1623     |\n","|    total_timesteps    | 522000   |\n","| train/                |          |\n","|    entropy_loss       | -4.39    |\n","|    explained_variance | 0.00436  |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26099    |\n","|    policy_loss        | 1.32     |\n","|    std                | 0.722    |\n","|    value_loss         | 3.88     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26200    |\n","|    time_elapsed       | 1629     |\n","|    total_timesteps    | 524000   |\n","| train/                |          |\n","|    entropy_loss       | -4.37    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26199    |\n","|    policy_loss        | 0.00276  |\n","|    std                | 0.722    |\n","|    value_loss         | 1.34e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26300    |\n","|    time_elapsed       | 1635     |\n","|    total_timesteps    | 526000   |\n","| train/                |          |\n","|    entropy_loss       | -4.36    |\n","|    explained_variance | 0.95     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26299    |\n","|    policy_loss        | 0.00177  |\n","|    std                | 0.72     |\n","|    value_loss         | 1.24e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26400    |\n","|    time_elapsed       | 1641     |\n","|    total_timesteps    | 528000   |\n","| train/                |          |\n","|    entropy_loss       | -4.38    |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26399    |\n","|    policy_loss        | 0.00935  |\n","|    std                | 0.723    |\n","|    value_loss         | 1.51e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26500    |\n","|    time_elapsed       | 1646     |\n","|    total_timesteps    | 530000   |\n","| train/                |          |\n","|    entropy_loss       | -4.37    |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26499    |\n","|    policy_loss        | -0.00284 |\n","|    std                | 0.722    |\n","|    value_loss         | 4.73e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26600    |\n","|    time_elapsed       | 1653     |\n","|    total_timesteps    | 532000   |\n","| train/                |          |\n","|    entropy_loss       | -4.36    |\n","|    explained_variance | 0.845    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26599    |\n","|    policy_loss        | -0.00662 |\n","|    std                | 0.72     |\n","|    value_loss         | 4.52e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26700    |\n","|    time_elapsed       | 1659     |\n","|    total_timesteps    | 534000   |\n","| train/                |          |\n","|    entropy_loss       | -4.35    |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26699    |\n","|    policy_loss        | 0.0012   |\n","|    std                | 0.719    |\n","|    value_loss         | 2.09e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26800    |\n","|    time_elapsed       | 1666     |\n","|    total_timesteps    | 536000   |\n","| train/                |          |\n","|    entropy_loss       | -4.33    |\n","|    explained_variance | 0.986    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26799    |\n","|    policy_loss        | -0.00135 |\n","|    std                | 0.715    |\n","|    value_loss         | 5.19e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 26900    |\n","|    time_elapsed       | 1671     |\n","|    total_timesteps    | 538000   |\n","| train/                |          |\n","|    entropy_loss       | -4.33    |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26899    |\n","|    policy_loss        | 0.000111 |\n","|    std                | 0.716    |\n","|    value_loss         | 1.34e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 27000    |\n","|    time_elapsed       | 1678     |\n","|    total_timesteps    | 540000   |\n","| train/                |          |\n","|    entropy_loss       | -4.34    |\n","|    explained_variance | 0.941    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 26999    |\n","|    policy_loss        | -0.0281  |\n","|    std                | 0.716    |\n","|    value_loss         | 0.000137 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 27100    |\n","|    time_elapsed       | 1684     |\n","|    total_timesteps    | 542000   |\n","| train/                |          |\n","|    entropy_loss       | -4.32    |\n","|    explained_variance | 0.903    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27099    |\n","|    policy_loss        | 0.00104  |\n","|    std                | 0.714    |\n","|    value_loss         | 6.46e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 27200    |\n","|    time_elapsed       | 1690     |\n","|    total_timesteps    | 544000   |\n","| train/                |          |\n","|    entropy_loss       | -4.32    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27199    |\n","|    policy_loss        | -0.00936 |\n","|    std                | 0.713    |\n","|    value_loss         | 9.5e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 27300    |\n","|    time_elapsed       | 1696     |\n","|    total_timesteps    | 546000   |\n","| train/                |          |\n","|    entropy_loss       | -4.31    |\n","|    explained_variance | 0.965    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27299    |\n","|    policy_loss        | 0.00152  |\n","|    std                | 0.712    |\n","|    value_loss         | 1.94e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 49        |\n","|    ep_rew_mean        | -49       |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 27400     |\n","|    time_elapsed       | 1703      |\n","|    total_timesteps    | 548000    |\n","| train/                |           |\n","|    entropy_loss       | -4.31     |\n","|    explained_variance | 0.995     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 27399     |\n","|    policy_loss        | -0.000549 |\n","|    std                | 0.711     |\n","|    value_loss         | 7.89e-07  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 50       |\n","|    ep_rew_mean        | -50      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 27500    |\n","|    time_elapsed       | 1709     |\n","|    total_timesteps    | 550000   |\n","| train/                |          |\n","|    entropy_loss       | -4.3     |\n","|    explained_variance | 0.921    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27499    |\n","|    policy_loss        | 0.00315  |\n","|    std                | 0.71     |\n","|    value_loss         | 2.35e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 27600    |\n","|    time_elapsed       | 1715     |\n","|    total_timesteps    | 552000   |\n","| train/                |          |\n","|    entropy_loss       | -4.29    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27599    |\n","|    policy_loss        | -0.00397 |\n","|    std                | 0.707    |\n","|    value_loss         | 3.03e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 46.1     |\n","|    ep_rew_mean        | -46      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 27700    |\n","|    time_elapsed       | 1721     |\n","|    total_timesteps    | 554000   |\n","| train/                |          |\n","|    entropy_loss       | -4.29    |\n","|    explained_variance | 0.953    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27699    |\n","|    policy_loss        | -0.00256 |\n","|    std                | 0.707    |\n","|    value_loss         | 5.3e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 27800    |\n","|    time_elapsed       | 1727     |\n","|    total_timesteps    | 556000   |\n","| train/                |          |\n","|    entropy_loss       | -4.28    |\n","|    explained_variance | 0.978    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27799    |\n","|    policy_loss        | 0.004    |\n","|    std                | 0.707    |\n","|    value_loss         | 7.64e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 27900    |\n","|    time_elapsed       | 1733     |\n","|    total_timesteps    | 558000   |\n","| train/                |          |\n","|    entropy_loss       | -4.28    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27899    |\n","|    policy_loss        | -0.00315 |\n","|    std                | 0.706    |\n","|    value_loss         | 3.17e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 28000    |\n","|    time_elapsed       | 1739     |\n","|    total_timesteps    | 560000   |\n","| train/                |          |\n","|    entropy_loss       | -4.27    |\n","|    explained_variance | 0.922    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 27999    |\n","|    policy_loss        | 0.013    |\n","|    std                | 0.705    |\n","|    value_loss         | 5.23e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 28100    |\n","|    time_elapsed       | 1746     |\n","|    total_timesteps    | 562000   |\n","| train/                |          |\n","|    entropy_loss       | -4.27    |\n","|    explained_variance | 0.968    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28099    |\n","|    policy_loss        | 0.0028   |\n","|    std                | 0.704    |\n","|    value_loss         | 2.9e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 28200    |\n","|    time_elapsed       | 1751     |\n","|    total_timesteps    | 564000   |\n","| train/                |          |\n","|    entropy_loss       | -4.26    |\n","|    explained_variance | 0.786    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28199    |\n","|    policy_loss        | 0.00103  |\n","|    std                | 0.703    |\n","|    value_loss         | 1.06e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 50       |\n","|    ep_rew_mean        | -50      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 28300    |\n","|    time_elapsed       | 1758     |\n","|    total_timesteps    | 566000   |\n","| train/                |          |\n","|    entropy_loss       | -4.27    |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28299    |\n","|    policy_loss        | 0.000481 |\n","|    std                | 0.704    |\n","|    value_loss         | 4.7e-07  |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 50        |\n","|    ep_rew_mean        | -50       |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 28400     |\n","|    time_elapsed       | 1764      |\n","|    total_timesteps    | 568000    |\n","| train/                |           |\n","|    entropy_loss       | -4.27     |\n","|    explained_variance | 0.904     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 28399     |\n","|    policy_loss        | -0.000826 |\n","|    std                | 0.704     |\n","|    value_loss         | 9.83e-07  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 28500    |\n","|    time_elapsed       | 1771     |\n","|    total_timesteps    | 570000   |\n","| train/                |          |\n","|    entropy_loss       | -4.26    |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28499    |\n","|    policy_loss        | 0.00127  |\n","|    std                | 0.703    |\n","|    value_loss         | 4.86e-07 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 28600    |\n","|    time_elapsed       | 1776     |\n","|    total_timesteps    | 572000   |\n","| train/                |          |\n","|    entropy_loss       | -4.26    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28599    |\n","|    policy_loss        | 0.00524  |\n","|    std                | 0.702    |\n","|    value_loss         | 2.94e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 28700    |\n","|    time_elapsed       | 1783     |\n","|    total_timesteps    | 574000   |\n","| train/                |          |\n","|    entropy_loss       | -4.25    |\n","|    explained_variance | 0.98     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28699    |\n","|    policy_loss        | -0.00227 |\n","|    std                | 0.702    |\n","|    value_loss         | 2.17e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.3     |\n","|    ep_rew_mean        | -48.2    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 28800    |\n","|    time_elapsed       | 1788     |\n","|    total_timesteps    | 576000   |\n","| train/                |          |\n","|    entropy_loss       | -4.23    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28799    |\n","|    policy_loss        | -0.00261 |\n","|    std                | 0.697    |\n","|    value_loss         | 4.74e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48.3      |\n","|    ep_rew_mean        | -48.2     |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 28900     |\n","|    time_elapsed       | 1795      |\n","|    total_timesteps    | 578000    |\n","| train/                |           |\n","|    entropy_loss       | -4.2      |\n","|    explained_variance | 0.979     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 28899     |\n","|    policy_loss        | -0.000543 |\n","|    std                | 0.693     |\n","|    value_loss         | 1.13e-05  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 29000    |\n","|    time_elapsed       | 1801     |\n","|    total_timesteps    | 580000   |\n","| train/                |          |\n","|    entropy_loss       | -4.2     |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 28999    |\n","|    policy_loss        | 0.00182  |\n","|    std                | 0.692    |\n","|    value_loss         | 2.31e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 29100    |\n","|    time_elapsed       | 1806     |\n","|    total_timesteps    | 582000   |\n","| train/                |          |\n","|    entropy_loss       | -4.18    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29099    |\n","|    policy_loss        | -0.00155 |\n","|    std                | 0.689    |\n","|    value_loss         | 1.3e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 29200    |\n","|    time_elapsed       | 1813     |\n","|    total_timesteps    | 584000   |\n","| train/                |          |\n","|    entropy_loss       | -4.16    |\n","|    explained_variance | 0.998    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29199    |\n","|    policy_loss        | -0.00291 |\n","|    std                | 0.686    |\n","|    value_loss         | 2.25e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 29300    |\n","|    time_elapsed       | 1820     |\n","|    total_timesteps    | 586000   |\n","| train/                |          |\n","|    entropy_loss       | -4.15    |\n","|    explained_variance | 0.872    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29299    |\n","|    policy_loss        | -0.00144 |\n","|    std                | 0.684    |\n","|    value_loss         | 4.58e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 29400    |\n","|    time_elapsed       | 1826     |\n","|    total_timesteps    | 588000   |\n","| train/                |          |\n","|    entropy_loss       | -4.15    |\n","|    explained_variance | 0.948    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29399    |\n","|    policy_loss        | -0.00125 |\n","|    std                | 0.684    |\n","|    value_loss         | 1.21e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.1     |\n","|    ep_rew_mean        | -49.1    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 29500    |\n","|    time_elapsed       | 1832     |\n","|    total_timesteps    | 590000   |\n","| train/                |          |\n","|    entropy_loss       | -4.13    |\n","|    explained_variance | 0.903    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29499    |\n","|    policy_loss        | -0.0435  |\n","|    std                | 0.681    |\n","|    value_loss         | 0.000123 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.6     |\n","|    ep_rew_mean        | -49.6    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 29600    |\n","|    time_elapsed       | 1839     |\n","|    total_timesteps    | 592000   |\n","| train/                |          |\n","|    entropy_loss       | -4.13    |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29599    |\n","|    policy_loss        | -0.00305 |\n","|    std                | 0.681    |\n","|    value_loss         | 2.86e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.1     |\n","|    ep_rew_mean        | -49.1    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 29700    |\n","|    time_elapsed       | 1844     |\n","|    total_timesteps    | 594000   |\n","| train/                |          |\n","|    entropy_loss       | -4.12    |\n","|    explained_variance | 0.949    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29699    |\n","|    policy_loss        | -0.00074 |\n","|    std                | 0.679    |\n","|    value_loss         | 1.14e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 29800    |\n","|    time_elapsed       | 1851     |\n","|    total_timesteps    | 596000   |\n","| train/                |          |\n","|    entropy_loss       | -4.12    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29799    |\n","|    policy_loss        | -0.00177 |\n","|    std                | 0.68     |\n","|    value_loss         | 8.38e-07 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 29900    |\n","|    time_elapsed       | 1857     |\n","|    total_timesteps    | 598000   |\n","| train/                |          |\n","|    entropy_loss       | -4.1     |\n","|    explained_variance | 0.996    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29899    |\n","|    policy_loss        | 0.004    |\n","|    std                | 0.675    |\n","|    value_loss         | 2.33e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 30000    |\n","|    time_elapsed       | 1863     |\n","|    total_timesteps    | 600000   |\n","| train/                |          |\n","|    entropy_loss       | -4.08    |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 29999    |\n","|    policy_loss        | 0.000335 |\n","|    std                | 0.673    |\n","|    value_loss         | 3.32e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 30100    |\n","|    time_elapsed       | 1869     |\n","|    total_timesteps    | 602000   |\n","| train/                |          |\n","|    entropy_loss       | -4.07    |\n","|    explained_variance | 0.00489  |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30099    |\n","|    policy_loss        | 6.22     |\n","|    std                | 0.672    |\n","|    value_loss         | 7.77     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 30200    |\n","|    time_elapsed       | 1876     |\n","|    total_timesteps    | 604000   |\n","| train/                |          |\n","|    entropy_loss       | -4.07    |\n","|    explained_variance | 0.966    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30199    |\n","|    policy_loss        | -0.00944 |\n","|    std                | 0.671    |\n","|    value_loss         | 1.09e-05 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48        |\n","|    ep_rew_mean        | -48       |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 30300     |\n","|    time_elapsed       | 1882      |\n","|    total_timesteps    | 606000    |\n","| train/                |           |\n","|    entropy_loss       | -4.07     |\n","|    explained_variance | 0.992     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 30299     |\n","|    policy_loss        | -0.000147 |\n","|    std                | 0.67      |\n","|    value_loss         | 2.51e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 30400    |\n","|    time_elapsed       | 1888     |\n","|    total_timesteps    | 608000   |\n","| train/                |          |\n","|    entropy_loss       | -4.07    |\n","|    explained_variance | 0.968    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30399    |\n","|    policy_loss        | 0.00494  |\n","|    std                | 0.67     |\n","|    value_loss         | 3.23e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 30500    |\n","|    time_elapsed       | 1894     |\n","|    total_timesteps    | 610000   |\n","| train/                |          |\n","|    entropy_loss       | -4.05    |\n","|    explained_variance | 0.0129   |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30499    |\n","|    policy_loss        | 1.49     |\n","|    std                | 0.666    |\n","|    value_loss         | 3.84     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 30600    |\n","|    time_elapsed       | 1901     |\n","|    total_timesteps    | 612000   |\n","| train/                |          |\n","|    entropy_loss       | -4.04    |\n","|    explained_variance | 0.974    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30599    |\n","|    policy_loss        | -0.00605 |\n","|    std                | 0.665    |\n","|    value_loss         | 8.73e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 30700    |\n","|    time_elapsed       | 1907     |\n","|    total_timesteps    | 614000   |\n","| train/                |          |\n","|    entropy_loss       | -4.03    |\n","|    explained_variance | 0.903    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30699    |\n","|    policy_loss        | -0.00291 |\n","|    std                | 0.663    |\n","|    value_loss         | 3.43e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 30800    |\n","|    time_elapsed       | 1913     |\n","|    total_timesteps    | 616000   |\n","| train/                |          |\n","|    entropy_loss       | -4.02    |\n","|    explained_variance | 0.964    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30799    |\n","|    policy_loss        | 0.00122  |\n","|    std                | 0.662    |\n","|    value_loss         | 1.18e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 30900    |\n","|    time_elapsed       | 1919     |\n","|    total_timesteps    | 618000   |\n","| train/                |          |\n","|    entropy_loss       | -4       |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30899    |\n","|    policy_loss        | -0.00202 |\n","|    std                | 0.658    |\n","|    value_loss         | 5.54e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 31000    |\n","|    time_elapsed       | 1924     |\n","|    total_timesteps    | 620000   |\n","| train/                |          |\n","|    entropy_loss       | -3.99    |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 30999    |\n","|    policy_loss        | 0.00142  |\n","|    std                | 0.657    |\n","|    value_loss         | 1.69e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 31100    |\n","|    time_elapsed       | 1931     |\n","|    total_timesteps    | 622000   |\n","| train/                |          |\n","|    entropy_loss       | -3.99    |\n","|    explained_variance | 0.998    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31099    |\n","|    policy_loss        | 0.000119 |\n","|    std                | 0.657    |\n","|    value_loss         | 8.93e-07 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 31200    |\n","|    time_elapsed       | 1937     |\n","|    total_timesteps    | 624000   |\n","| train/                |          |\n","|    entropy_loss       | -3.99    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31199    |\n","|    policy_loss        | 0.00198  |\n","|    std                | 0.657    |\n","|    value_loss         | 1.63e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 31300    |\n","|    time_elapsed       | 1944     |\n","|    total_timesteps    | 626000   |\n","| train/                |          |\n","|    entropy_loss       | -3.94    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31299    |\n","|    policy_loss        | -0.00346 |\n","|    std                | 0.65     |\n","|    value_loss         | 1.58e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 31400    |\n","|    time_elapsed       | 1949     |\n","|    total_timesteps    | 628000   |\n","| train/                |          |\n","|    entropy_loss       | -3.95    |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31399    |\n","|    policy_loss        | 9.44e-06 |\n","|    std                | 0.65     |\n","|    value_loss         | 5.46e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 31500    |\n","|    time_elapsed       | 1956     |\n","|    total_timesteps    | 630000   |\n","| train/                |          |\n","|    entropy_loss       | -3.94    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31499    |\n","|    policy_loss        | 0.0104   |\n","|    std                | 0.649    |\n","|    value_loss         | 1.78e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 31600    |\n","|    time_elapsed       | 1962     |\n","|    total_timesteps    | 632000   |\n","| train/                |          |\n","|    entropy_loss       | -3.93    |\n","|    explained_variance | 0.955    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31599    |\n","|    policy_loss        | 0.00218  |\n","|    std                | 0.647    |\n","|    value_loss         | 4.3e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 31700    |\n","|    time_elapsed       | 1968     |\n","|    total_timesteps    | 634000   |\n","| train/                |          |\n","|    entropy_loss       | -3.91    |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31699    |\n","|    policy_loss        | 0.000514 |\n","|    std                | 0.645    |\n","|    value_loss         | 6.02e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 31800    |\n","|    time_elapsed       | 1974     |\n","|    total_timesteps    | 636000   |\n","| train/                |          |\n","|    entropy_loss       | -3.9     |\n","|    explained_variance | 0.98     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31799    |\n","|    policy_loss        | 0.00938  |\n","|    std                | 0.643    |\n","|    value_loss         | 9.18e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 31900    |\n","|    time_elapsed       | 1981     |\n","|    total_timesteps    | 638000   |\n","| train/                |          |\n","|    entropy_loss       | -3.89    |\n","|    explained_variance | 0.95     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31899    |\n","|    policy_loss        | -0.0213  |\n","|    std                | 0.642    |\n","|    value_loss         | 0.000101 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32000    |\n","|    time_elapsed       | 1986     |\n","|    total_timesteps    | 640000   |\n","| train/                |          |\n","|    entropy_loss       | -3.9     |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 31999    |\n","|    policy_loss        | 0.00213  |\n","|    std                | 0.642    |\n","|    value_loss         | 3.42e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32100    |\n","|    time_elapsed       | 1992     |\n","|    total_timesteps    | 642000   |\n","| train/                |          |\n","|    entropy_loss       | -3.91    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32099    |\n","|    policy_loss        | 0.0115   |\n","|    std                | 0.644    |\n","|    value_loss         | 1.85e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32200    |\n","|    time_elapsed       | 1999     |\n","|    total_timesteps    | 644000   |\n","| train/                |          |\n","|    entropy_loss       | -3.92    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32199    |\n","|    policy_loss        | -0.00667 |\n","|    std                | 0.646    |\n","|    value_loss         | 8.71e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32300    |\n","|    time_elapsed       | 2005     |\n","|    total_timesteps    | 646000   |\n","| train/                |          |\n","|    entropy_loss       | -3.93    |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32299    |\n","|    policy_loss        | 0.000976 |\n","|    std                | 0.648    |\n","|    value_loss         | 2.54e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32400    |\n","|    time_elapsed       | 2011     |\n","|    total_timesteps    | 648000   |\n","| train/                |          |\n","|    entropy_loss       | -3.89    |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32399    |\n","|    policy_loss        | -0.0256  |\n","|    std                | 0.642    |\n","|    value_loss         | 5.63e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32500    |\n","|    time_elapsed       | 2017     |\n","|    total_timesteps    | 650000   |\n","| train/                |          |\n","|    entropy_loss       | -3.89    |\n","|    explained_variance | 0.956    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32499    |\n","|    policy_loss        | -0.00328 |\n","|    std                | 0.641    |\n","|    value_loss         | 4.72e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32600    |\n","|    time_elapsed       | 2024     |\n","|    total_timesteps    | 652000   |\n","| train/                |          |\n","|    entropy_loss       | -3.88    |\n","|    explained_variance | 0.957    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32599    |\n","|    policy_loss        | -0.00627 |\n","|    std                | 0.64     |\n","|    value_loss         | 1.47e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32700    |\n","|    time_elapsed       | 2029     |\n","|    total_timesteps    | 654000   |\n","| train/                |          |\n","|    entropy_loss       | -3.88    |\n","|    explained_variance | 0.063    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32699    |\n","|    policy_loss        | -0.00859 |\n","|    std                | 0.64     |\n","|    value_loss         | 6.27e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32800    |\n","|    time_elapsed       | 2036     |\n","|    total_timesteps    | 656000   |\n","| train/                |          |\n","|    entropy_loss       | -3.89    |\n","|    explained_variance | 0.969    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32799    |\n","|    policy_loss        | -0.00293 |\n","|    std                | 0.641    |\n","|    value_loss         | 8.69e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 32900    |\n","|    time_elapsed       | 2042     |\n","|    total_timesteps    | 658000   |\n","| train/                |          |\n","|    entropy_loss       | -3.89    |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 32899    |\n","|    policy_loss        | -0.00592 |\n","|    std                | 0.642    |\n","|    value_loss         | 4.93e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48        |\n","|    ep_rew_mean        | -48       |\n","| time/                 |           |\n","|    fps                | 322       |\n","|    iterations         | 33000     |\n","|    time_elapsed       | 2049      |\n","|    total_timesteps    | 660000    |\n","| train/                |           |\n","|    entropy_loss       | -3.9      |\n","|    explained_variance | 0.943     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 32999     |\n","|    policy_loss        | -0.000123 |\n","|    std                | 0.643     |\n","|    value_loss         | 1.64e-05  |\n","-------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48.4      |\n","|    ep_rew_mean        | -48.3     |\n","| time/                 |           |\n","|    fps                | 322       |\n","|    iterations         | 33100     |\n","|    time_elapsed       | 2055      |\n","|    total_timesteps    | 662000    |\n","| train/                |           |\n","|    entropy_loss       | -3.92     |\n","|    explained_variance | 0.998     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 33099     |\n","|    policy_loss        | -0.000744 |\n","|    std                | 0.647     |\n","|    value_loss         | 8.69e-07  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.4     |\n","|    ep_rew_mean        | -48.3    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 33200    |\n","|    time_elapsed       | 2062     |\n","|    total_timesteps    | 664000   |\n","| train/                |          |\n","|    entropy_loss       | -3.9     |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33199    |\n","|    policy_loss        | 0.000583 |\n","|    std                | 0.644    |\n","|    value_loss         | 2.82e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.9     |\n","|    ep_rew_mean        | -47.8    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 33300    |\n","|    time_elapsed       | 2067     |\n","|    total_timesteps    | 666000   |\n","| train/                |          |\n","|    entropy_loss       | -3.86    |\n","|    explained_variance | 0.996    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33299    |\n","|    policy_loss        | -0.00895 |\n","|    std                | 0.637    |\n","|    value_loss         | 9.05e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 33400    |\n","|    time_elapsed       | 2074     |\n","|    total_timesteps    | 668000   |\n","| train/                |          |\n","|    entropy_loss       | -3.87    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33399    |\n","|    policy_loss        | 0.00113  |\n","|    std                | 0.638    |\n","|    value_loss         | 2.54e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 33500    |\n","|    time_elapsed       | 2080     |\n","|    total_timesteps    | 670000   |\n","| train/                |          |\n","|    entropy_loss       | -3.87    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33499    |\n","|    policy_loss        | 0.000191 |\n","|    std                | 0.639    |\n","|    value_loss         | 8.6e-07  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 33600    |\n","|    time_elapsed       | 2087     |\n","|    total_timesteps    | 672000   |\n","| train/                |          |\n","|    entropy_loss       | -3.88    |\n","|    explained_variance | -1.12    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33599    |\n","|    policy_loss        | 0.0139   |\n","|    std                | 0.64     |\n","|    value_loss         | 0.000148 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.4     |\n","|    ep_rew_mean        | -48.4    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 33700    |\n","|    time_elapsed       | 2093     |\n","|    total_timesteps    | 674000   |\n","| train/                |          |\n","|    entropy_loss       | -3.88    |\n","|    explained_variance | 0.23     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33699    |\n","|    policy_loss        | -0.0335  |\n","|    std                | 0.639    |\n","|    value_loss         | 0.000232 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.9     |\n","|    ep_rew_mean        | -48.9    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 33800    |\n","|    time_elapsed       | 2100     |\n","|    total_timesteps    | 676000   |\n","| train/                |          |\n","|    entropy_loss       | -3.86    |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33799    |\n","|    policy_loss        | 0.000593 |\n","|    std                | 0.636    |\n","|    value_loss         | 5.22e-07 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.4     |\n","|    ep_rew_mean        | -48.4    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 33900    |\n","|    time_elapsed       | 2105     |\n","|    total_timesteps    | 678000   |\n","| train/                |          |\n","|    entropy_loss       | -3.83    |\n","|    explained_variance | 0.998    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33899    |\n","|    policy_loss        | 0.0128   |\n","|    std                | 0.632    |\n","|    value_loss         | 1.26e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34000    |\n","|    time_elapsed       | 2112     |\n","|    total_timesteps    | 680000   |\n","| train/                |          |\n","|    entropy_loss       | -3.84    |\n","|    explained_variance | 0.975    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 33999    |\n","|    policy_loss        | 0.00097  |\n","|    std                | 0.633    |\n","|    value_loss         | 3.36e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34100    |\n","|    time_elapsed       | 2118     |\n","|    total_timesteps    | 682000   |\n","| train/                |          |\n","|    entropy_loss       | -3.83    |\n","|    explained_variance | 0.978    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34099    |\n","|    policy_loss        | 0.00801  |\n","|    std                | 0.632    |\n","|    value_loss         | 9.69e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34200    |\n","|    time_elapsed       | 2124     |\n","|    total_timesteps    | 684000   |\n","| train/                |          |\n","|    entropy_loss       | -3.83    |\n","|    explained_variance | 0.956    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34199    |\n","|    policy_loss        | -0.00194 |\n","|    std                | 0.633    |\n","|    value_loss         | 3.13e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34300    |\n","|    time_elapsed       | 2131     |\n","|    total_timesteps    | 686000   |\n","| train/                |          |\n","|    entropy_loss       | -3.84    |\n","|    explained_variance | 0.956    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34299    |\n","|    policy_loss        | 0.000892 |\n","|    std                | 0.633    |\n","|    value_loss         | 3.67e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34400    |\n","|    time_elapsed       | 2137     |\n","|    total_timesteps    | 688000   |\n","| train/                |          |\n","|    entropy_loss       | -3.82    |\n","|    explained_variance | 0.966    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34399    |\n","|    policy_loss        | -0.00746 |\n","|    std                | 0.63     |\n","|    value_loss         | 7.19e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34500    |\n","|    time_elapsed       | 2143     |\n","|    total_timesteps    | 690000   |\n","| train/                |          |\n","|    entropy_loss       | -3.81    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34499    |\n","|    policy_loss        | 0.00125  |\n","|    std                | 0.629    |\n","|    value_loss         | 1.54e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34600    |\n","|    time_elapsed       | 2149     |\n","|    total_timesteps    | 692000   |\n","| train/                |          |\n","|    entropy_loss       | -3.8     |\n","|    explained_variance | 0.902    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34599    |\n","|    policy_loss        | 0.000892 |\n","|    std                | 0.627    |\n","|    value_loss         | 2.25e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34700    |\n","|    time_elapsed       | 2156     |\n","|    total_timesteps    | 694000   |\n","| train/                |          |\n","|    entropy_loss       | -3.8     |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34699    |\n","|    policy_loss        | -0.00013 |\n","|    std                | 0.627    |\n","|    value_loss         | 5.69e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34800    |\n","|    time_elapsed       | 2162     |\n","|    total_timesteps    | 696000   |\n","| train/                |          |\n","|    entropy_loss       | -3.77    |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34799    |\n","|    policy_loss        | 0.00223  |\n","|    std                | 0.622    |\n","|    value_loss         | 2.36e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.2     |\n","|    ep_rew_mean        | -48.2    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 34900    |\n","|    time_elapsed       | 2168     |\n","|    total_timesteps    | 698000   |\n","| train/                |          |\n","|    entropy_loss       | -3.77    |\n","|    explained_variance | 0.852    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34899    |\n","|    policy_loss        | 0.00392  |\n","|    std                | 0.622    |\n","|    value_loss         | 6.95e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.2     |\n","|    ep_rew_mean        | -48.2    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 35000    |\n","|    time_elapsed       | 2174     |\n","|    total_timesteps    | 700000   |\n","| train/                |          |\n","|    entropy_loss       | -3.76    |\n","|    explained_variance | 0.996    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 34999    |\n","|    policy_loss        | -0.00224 |\n","|    std                | 0.621    |\n","|    value_loss         | 2.23e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 35100    |\n","|    time_elapsed       | 2181     |\n","|    total_timesteps    | 702000   |\n","| train/                |          |\n","|    entropy_loss       | -3.77    |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35099    |\n","|    policy_loss        | -0.00306 |\n","|    std                | 0.622    |\n","|    value_loss         | 4.37e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.4     |\n","|    ep_rew_mean        | -48.4    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 35200    |\n","|    time_elapsed       | 2186     |\n","|    total_timesteps    | 704000   |\n","| train/                |          |\n","|    entropy_loss       | -3.76    |\n","|    explained_variance | 0.944    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35199    |\n","|    policy_loss        | -0.011   |\n","|    std                | 0.622    |\n","|    value_loss         | 1.77e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.8     |\n","|    ep_rew_mean        | -47.7    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 35300    |\n","|    time_elapsed       | 2193     |\n","|    total_timesteps    | 706000   |\n","| train/                |          |\n","|    entropy_loss       | -3.74    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35299    |\n","|    policy_loss        | 0.000581 |\n","|    std                | 0.619    |\n","|    value_loss         | 1.22e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.8     |\n","|    ep_rew_mean        | -47.7    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 35400    |\n","|    time_elapsed       | 2199     |\n","|    total_timesteps    | 708000   |\n","| train/                |          |\n","|    entropy_loss       | -3.75    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35399    |\n","|    policy_loss        | -0.00404 |\n","|    std                | 0.619    |\n","|    value_loss         | 5.11e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 35500    |\n","|    time_elapsed       | 2206     |\n","|    total_timesteps    | 710000   |\n","| train/                |          |\n","|    entropy_loss       | -3.75    |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35499    |\n","|    policy_loss        | -0.00227 |\n","|    std                | 0.619    |\n","|    value_loss         | 6.73e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 35600    |\n","|    time_elapsed       | 2212     |\n","|    total_timesteps    | 712000   |\n","| train/                |          |\n","|    entropy_loss       | -3.74    |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35599    |\n","|    policy_loss        | -0.00401 |\n","|    std                | 0.618    |\n","|    value_loss         | 1.07e-05 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 49.5      |\n","|    ep_rew_mean        | -49.5     |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 35700     |\n","|    time_elapsed       | 2219      |\n","|    total_timesteps    | 714000    |\n","| train/                |           |\n","|    entropy_loss       | -3.73     |\n","|    explained_variance | 0.981     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 35699     |\n","|    policy_loss        | -0.000242 |\n","|    std                | 0.616     |\n","|    value_loss         | 1.11e-05  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 35800    |\n","|    time_elapsed       | 2225     |\n","|    total_timesteps    | 716000   |\n","| train/                |          |\n","|    entropy_loss       | -3.73    |\n","|    explained_variance | 0.961    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35799    |\n","|    policy_loss        | -0.00604 |\n","|    std                | 0.617    |\n","|    value_loss         | 6.39e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 35900    |\n","|    time_elapsed       | 2232     |\n","|    total_timesteps    | 718000   |\n","| train/                |          |\n","|    entropy_loss       | -3.73    |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35899    |\n","|    policy_loss        | -0.00193 |\n","|    std                | 0.617    |\n","|    value_loss         | 6.03e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36000    |\n","|    time_elapsed       | 2238     |\n","|    total_timesteps    | 720000   |\n","| train/                |          |\n","|    entropy_loss       | -3.74    |\n","|    explained_variance | 0.996    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 35999    |\n","|    policy_loss        | -0.00847 |\n","|    std                | 0.618    |\n","|    value_loss         | 1.53e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36100    |\n","|    time_elapsed       | 2245     |\n","|    total_timesteps    | 722000   |\n","| train/                |          |\n","|    entropy_loss       | -3.73    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36099    |\n","|    policy_loss        | 0.00217  |\n","|    std                | 0.618    |\n","|    value_loss         | 3.88e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36200    |\n","|    time_elapsed       | 2251     |\n","|    total_timesteps    | 724000   |\n","| train/                |          |\n","|    entropy_loss       | -3.76    |\n","|    explained_variance | 0.999    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36199    |\n","|    policy_loss        | -0.0146  |\n","|    std                | 0.622    |\n","|    value_loss         | 2.64e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.1     |\n","|    ep_rew_mean        | -48.1    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36300    |\n","|    time_elapsed       | 2258     |\n","|    total_timesteps    | 726000   |\n","| train/                |          |\n","|    entropy_loss       | -3.75    |\n","|    explained_variance | 0.00718  |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36299    |\n","|    policy_loss        | 2.79     |\n","|    std                | 0.619    |\n","|    value_loss         | 7.64     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.6     |\n","|    ep_rew_mean        | -47.6    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36400    |\n","|    time_elapsed       | 2264     |\n","|    total_timesteps    | 728000   |\n","| train/                |          |\n","|    entropy_loss       | -3.75    |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36399    |\n","|    policy_loss        | -0.00105 |\n","|    std                | 0.619    |\n","|    value_loss         | 5.98e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.1     |\n","|    ep_rew_mean        | -48.1    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36500    |\n","|    time_elapsed       | 2272     |\n","|    total_timesteps    | 730000   |\n","| train/                |          |\n","|    entropy_loss       | -3.75    |\n","|    explained_variance | 0.998    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36499    |\n","|    policy_loss        | 0.000307 |\n","|    std                | 0.62     |\n","|    value_loss         | 1.31e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 46.7     |\n","|    ep_rew_mean        | -46.6    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36600    |\n","|    time_elapsed       | 2278     |\n","|    total_timesteps    | 732000   |\n","| train/                |          |\n","|    entropy_loss       | -3.72    |\n","|    explained_variance | 0.0203   |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36599    |\n","|    policy_loss        | 1.17     |\n","|    std                | 0.614    |\n","|    value_loss         | 3.81     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.7     |\n","|    ep_rew_mean        | -47.6    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36700    |\n","|    time_elapsed       | 2285     |\n","|    total_timesteps    | 734000   |\n","| train/                |          |\n","|    entropy_loss       | -3.71    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36699    |\n","|    policy_loss        | 0.0048   |\n","|    std                | 0.613    |\n","|    value_loss         | 1.38e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.7     |\n","|    ep_rew_mean        | -47.6    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36800    |\n","|    time_elapsed       | 2291     |\n","|    total_timesteps    | 736000   |\n","| train/                |          |\n","|    entropy_loss       | -3.72    |\n","|    explained_variance | 0.951    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36799    |\n","|    policy_loss        | 0.00313  |\n","|    std                | 0.615    |\n","|    value_loss         | 7.27e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 36900    |\n","|    time_elapsed       | 2298     |\n","|    total_timesteps    | 738000   |\n","| train/                |          |\n","|    entropy_loss       | -3.71    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36899    |\n","|    policy_loss        | -0.0048  |\n","|    std                | 0.614    |\n","|    value_loss         | 6.38e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 37000    |\n","|    time_elapsed       | 2304     |\n","|    total_timesteps    | 740000   |\n","| train/                |          |\n","|    entropy_loss       | -3.72    |\n","|    explained_variance | 0.962    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 36999    |\n","|    policy_loss        | 0.0121   |\n","|    std                | 0.615    |\n","|    value_loss         | 2.19e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 37100    |\n","|    time_elapsed       | 2312     |\n","|    total_timesteps    | 742000   |\n","| train/                |          |\n","|    entropy_loss       | -3.7     |\n","|    explained_variance | 0.998    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37099    |\n","|    policy_loss        | -0.00453 |\n","|    std                | 0.612    |\n","|    value_loss         | 4.09e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 37200    |\n","|    time_elapsed       | 2318     |\n","|    total_timesteps    | 744000   |\n","| train/                |          |\n","|    entropy_loss       | -3.69    |\n","|    explained_variance | 0.933    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37199    |\n","|    policy_loss        | 0.0032   |\n","|    std                | 0.61     |\n","|    value_loss         | 1.22e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 37300    |\n","|    time_elapsed       | 2326     |\n","|    total_timesteps    | 746000   |\n","| train/                |          |\n","|    entropy_loss       | -3.69    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37299    |\n","|    policy_loss        | -0.00679 |\n","|    std                | 0.61     |\n","|    value_loss         | 5.75e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.3     |\n","|    ep_rew_mean        | -48.3    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 37400    |\n","|    time_elapsed       | 2332     |\n","|    total_timesteps    | 748000   |\n","| train/                |          |\n","|    entropy_loss       | -3.68    |\n","|    explained_variance | 0.98     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37399    |\n","|    policy_loss        | 0.00234  |\n","|    std                | 0.61     |\n","|    value_loss         | 6.43e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.1     |\n","|    ep_rew_mean        | -48.1    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 37500    |\n","|    time_elapsed       | 2339     |\n","|    total_timesteps    | 750000   |\n","| train/                |          |\n","|    entropy_loss       | -3.7     |\n","|    explained_variance | 0.976    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37499    |\n","|    policy_loss        | -0.00311 |\n","|    std                | 0.612    |\n","|    value_loss         | 2.36e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 37600    |\n","|    time_elapsed       | 2345     |\n","|    total_timesteps    | 752000   |\n","| train/                |          |\n","|    entropy_loss       | -3.7     |\n","|    explained_variance | 0.925    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37599    |\n","|    policy_loss        | -0.0482  |\n","|    std                | 0.611    |\n","|    value_loss         | 0.00345  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.3     |\n","|    ep_rew_mean        | -47.2    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 37700    |\n","|    time_elapsed       | 2353     |\n","|    total_timesteps    | 754000   |\n","| train/                |          |\n","|    entropy_loss       | -3.68    |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37699    |\n","|    policy_loss        | -0.0146  |\n","|    std                | 0.608    |\n","|    value_loss         | 1.81e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 46.8     |\n","|    ep_rew_mean        | -46.8    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 37800    |\n","|    time_elapsed       | 2359     |\n","|    total_timesteps    | 756000   |\n","| train/                |          |\n","|    entropy_loss       | -3.68    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37799    |\n","|    policy_loss        | -0.00412 |\n","|    std                | 0.608    |\n","|    value_loss         | 4.66e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47.1    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 37900    |\n","|    time_elapsed       | 2366     |\n","|    total_timesteps    | 758000   |\n","| train/                |          |\n","|    entropy_loss       | -3.66    |\n","|    explained_variance | -0.568   |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37899    |\n","|    policy_loss        | -0.00627 |\n","|    std                | 0.605    |\n","|    value_loss         | 8.32e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38000    |\n","|    time_elapsed       | 2371     |\n","|    total_timesteps    | 760000   |\n","| train/                |          |\n","|    entropy_loss       | -3.64    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 37999    |\n","|    policy_loss        | -0.00115 |\n","|    std                | 0.602    |\n","|    value_loss         | 2.03e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38100    |\n","|    time_elapsed       | 2378     |\n","|    total_timesteps    | 762000   |\n","| train/                |          |\n","|    entropy_loss       | -3.62    |\n","|    explained_variance | 0.948    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38099    |\n","|    policy_loss        | -0.00437 |\n","|    std                | 0.599    |\n","|    value_loss         | 1.58e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38200    |\n","|    time_elapsed       | 2384     |\n","|    total_timesteps    | 764000   |\n","| train/                |          |\n","|    entropy_loss       | -3.61    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38199    |\n","|    policy_loss        | -0.00473 |\n","|    std                | 0.598    |\n","|    value_loss         | 6.11e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38300    |\n","|    time_elapsed       | 2391     |\n","|    total_timesteps    | 766000   |\n","| train/                |          |\n","|    entropy_loss       | -3.6     |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38299    |\n","|    policy_loss        | -0.00522 |\n","|    std                | 0.597    |\n","|    value_loss         | 5.88e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38400    |\n","|    time_elapsed       | 2397     |\n","|    total_timesteps    | 768000   |\n","| train/                |          |\n","|    entropy_loss       | -3.59    |\n","|    explained_variance | 0.622    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38399    |\n","|    policy_loss        | 0.00679  |\n","|    std                | 0.595    |\n","|    value_loss         | 1.34e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38500    |\n","|    time_elapsed       | 2404     |\n","|    total_timesteps    | 770000   |\n","| train/                |          |\n","|    entropy_loss       | -3.58    |\n","|    explained_variance | 0.724    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38499    |\n","|    policy_loss        | 0.0114   |\n","|    std                | 0.595    |\n","|    value_loss         | 1.94e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38600    |\n","|    time_elapsed       | 2410     |\n","|    total_timesteps    | 772000   |\n","| train/                |          |\n","|    entropy_loss       | -3.57    |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38599    |\n","|    policy_loss        | 0.00418  |\n","|    std                | 0.593    |\n","|    value_loss         | 2.25e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38700    |\n","|    time_elapsed       | 2417     |\n","|    total_timesteps    | 774000   |\n","| train/                |          |\n","|    entropy_loss       | -3.57    |\n","|    explained_variance | 0.969    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38699    |\n","|    policy_loss        | -0.0191  |\n","|    std                | 0.593    |\n","|    value_loss         | 5.32e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38800    |\n","|    time_elapsed       | 2422     |\n","|    total_timesteps    | 776000   |\n","| train/                |          |\n","|    entropy_loss       | -3.57    |\n","|    explained_variance | 0.936    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38799    |\n","|    policy_loss        | 0.00119  |\n","|    std                | 0.593    |\n","|    value_loss         | 2.66e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 38900    |\n","|    time_elapsed       | 2429     |\n","|    total_timesteps    | 778000   |\n","| train/                |          |\n","|    entropy_loss       | -3.54    |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38899    |\n","|    policy_loss        | -0.00204 |\n","|    std                | 0.588    |\n","|    value_loss         | 1.54e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 39000    |\n","|    time_elapsed       | 2435     |\n","|    total_timesteps    | 780000   |\n","| train/                |          |\n","|    entropy_loss       | -3.54    |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 38999    |\n","|    policy_loss        | 0.00462  |\n","|    std                | 0.588    |\n","|    value_loss         | 7.1e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 39100    |\n","|    time_elapsed       | 2441     |\n","|    total_timesteps    | 782000   |\n","| train/                |          |\n","|    entropy_loss       | -3.5     |\n","|    explained_variance | 0.98     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39099    |\n","|    policy_loss        | -0.00804 |\n","|    std                | 0.582    |\n","|    value_loss         | 9.9e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 39200    |\n","|    time_elapsed       | 2447     |\n","|    total_timesteps    | 784000   |\n","| train/                |          |\n","|    entropy_loss       | -3.5     |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39199    |\n","|    policy_loss        | -0.00284 |\n","|    std                | 0.583    |\n","|    value_loss         | 6.14e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 39300    |\n","|    time_elapsed       | 2453     |\n","|    total_timesteps    | 786000   |\n","| train/                |          |\n","|    entropy_loss       | -3.5     |\n","|    explained_variance | 0.996    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39299    |\n","|    policy_loss        | 0.00224  |\n","|    std                | 0.583    |\n","|    value_loss         | 2.61e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 49.5      |\n","|    ep_rew_mean        | -49.5     |\n","| time/                 |           |\n","|    fps                | 320       |\n","|    iterations         | 39400     |\n","|    time_elapsed       | 2460      |\n","|    total_timesteps    | 788000    |\n","| train/                |           |\n","|    entropy_loss       | -3.49     |\n","|    explained_variance | 0.874     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 39399     |\n","|    policy_loss        | -0.000632 |\n","|    std                | 0.582     |\n","|    value_loss         | 6.33e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 39500    |\n","|    time_elapsed       | 2465     |\n","|    total_timesteps    | 790000   |\n","| train/                |          |\n","|    entropy_loss       | -3.49    |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39499    |\n","|    policy_loss        | -0.00634 |\n","|    std                | 0.58     |\n","|    value_loss         | 6.09e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 39600    |\n","|    time_elapsed       | 2472     |\n","|    total_timesteps    | 792000   |\n","| train/                |          |\n","|    entropy_loss       | -3.49    |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39599    |\n","|    policy_loss        | -0.00199 |\n","|    std                | 0.582    |\n","|    value_loss         | 1.18e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 39700    |\n","|    time_elapsed       | 2478     |\n","|    total_timesteps    | 794000   |\n","| train/                |          |\n","|    entropy_loss       | -3.47    |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39699    |\n","|    policy_loss        | -0.00372 |\n","|    std                | 0.578    |\n","|    value_loss         | 3.69e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 39800    |\n","|    time_elapsed       | 2485     |\n","|    total_timesteps    | 796000   |\n","| train/                |          |\n","|    entropy_loss       | -3.47    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39799    |\n","|    policy_loss        | 0.00494  |\n","|    std                | 0.578    |\n","|    value_loss         | 5.94e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 39900    |\n","|    time_elapsed       | 2490     |\n","|    total_timesteps    | 798000   |\n","| train/                |          |\n","|    entropy_loss       | -3.47    |\n","|    explained_variance | 0.00847  |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39899    |\n","|    policy_loss        | 1.07     |\n","|    std                | 0.576    |\n","|    value_loss         | 3.82     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 40000    |\n","|    time_elapsed       | 2497     |\n","|    total_timesteps    | 800000   |\n","| train/                |          |\n","|    entropy_loss       | -3.46    |\n","|    explained_variance | 0.988    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 39999    |\n","|    policy_loss        | -0.0019  |\n","|    std                | 0.577    |\n","|    value_loss         | 3.22e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48.5      |\n","|    ep_rew_mean        | -48.5     |\n","| time/                 |           |\n","|    fps                | 320       |\n","|    iterations         | 40100     |\n","|    time_elapsed       | 2502      |\n","|    total_timesteps    | 802000    |\n","| train/                |           |\n","|    entropy_loss       | -3.46     |\n","|    explained_variance | 0.996     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 40099     |\n","|    policy_loss        | -0.000233 |\n","|    std                | 0.576     |\n","|    value_loss         | 1.38e-06  |\n","-------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48.5      |\n","|    ep_rew_mean        | -48.5     |\n","| time/                 |           |\n","|    fps                | 320       |\n","|    iterations         | 40200     |\n","|    time_elapsed       | 2509      |\n","|    total_timesteps    | 804000    |\n","| train/                |           |\n","|    entropy_loss       | -3.46     |\n","|    explained_variance | 0.971     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 40199     |\n","|    policy_loss        | -0.000318 |\n","|    std                | 0.576     |\n","|    value_loss         | 2.48e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 40300    |\n","|    time_elapsed       | 2515     |\n","|    total_timesteps    | 806000   |\n","| train/                |          |\n","|    entropy_loss       | -3.43    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40299    |\n","|    policy_loss        | 0.00128  |\n","|    std                | 0.572    |\n","|    value_loss         | 1.94e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 40400    |\n","|    time_elapsed       | 2521     |\n","|    total_timesteps    | 808000   |\n","| train/                |          |\n","|    entropy_loss       | -3.43    |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40399    |\n","|    policy_loss        | -0.00142 |\n","|    std                | 0.571    |\n","|    value_loss         | 2.06e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 49        |\n","|    ep_rew_mean        | -49       |\n","| time/                 |           |\n","|    fps                | 320       |\n","|    iterations         | 40500     |\n","|    time_elapsed       | 2527      |\n","|    total_timesteps    | 810000    |\n","| train/                |           |\n","|    entropy_loss       | -3.42     |\n","|    explained_variance | 0.921     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 40499     |\n","|    policy_loss        | -0.000969 |\n","|    std                | 0.57      |\n","|    value_loss         | 1.83e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.8     |\n","|    ep_rew_mean        | -48.7    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 40600    |\n","|    time_elapsed       | 2533     |\n","|    total_timesteps    | 812000   |\n","| train/                |          |\n","|    entropy_loss       | -3.41    |\n","|    explained_variance | 0.63     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40599    |\n","|    policy_loss        | -0.0366  |\n","|    std                | 0.57     |\n","|    value_loss         | 0.000372 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.3     |\n","|    ep_rew_mean        | -49.2    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 40700    |\n","|    time_elapsed       | 2539     |\n","|    total_timesteps    | 814000   |\n","| train/                |          |\n","|    entropy_loss       | -3.4     |\n","|    explained_variance | 0.996    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40699    |\n","|    policy_loss        | 0.00154  |\n","|    std                | 0.568    |\n","|    value_loss         | 2.52e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.8     |\n","|    ep_rew_mean        | -47.7    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 40800    |\n","|    time_elapsed       | 2545     |\n","|    total_timesteps    | 816000   |\n","| train/                |          |\n","|    entropy_loss       | -3.39    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40799    |\n","|    policy_loss        | -0.00695 |\n","|    std                | 0.567    |\n","|    value_loss         | 8.29e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 40900    |\n","|    time_elapsed       | 2551     |\n","|    total_timesteps    | 818000   |\n","| train/                |          |\n","|    entropy_loss       | -3.39    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40899    |\n","|    policy_loss        | 0.00276  |\n","|    std                | 0.567    |\n","|    value_loss         | 1.95e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41000    |\n","|    time_elapsed       | 2557     |\n","|    total_timesteps    | 820000   |\n","| train/                |          |\n","|    entropy_loss       | -3.39    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 40999    |\n","|    policy_loss        | 0.00181  |\n","|    std                | 0.566    |\n","|    value_loss         | 2.44e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41100    |\n","|    time_elapsed       | 2564     |\n","|    total_timesteps    | 822000   |\n","| train/                |          |\n","|    entropy_loss       | -3.37    |\n","|    explained_variance | 0.926    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41099    |\n","|    policy_loss        | -0.00897 |\n","|    std                | 0.564    |\n","|    value_loss         | 1.29e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41200    |\n","|    time_elapsed       | 2569     |\n","|    total_timesteps    | 824000   |\n","| train/                |          |\n","|    entropy_loss       | -3.39    |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41199    |\n","|    policy_loss        | 0.00347  |\n","|    std                | 0.566    |\n","|    value_loss         | 1.27e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41300    |\n","|    time_elapsed       | 2576     |\n","|    total_timesteps    | 826000   |\n","| train/                |          |\n","|    entropy_loss       | -3.37    |\n","|    explained_variance | 0.931    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41299    |\n","|    policy_loss        | -0.00187 |\n","|    std                | 0.563    |\n","|    value_loss         | 1.85e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41400    |\n","|    time_elapsed       | 2581     |\n","|    total_timesteps    | 828000   |\n","| train/                |          |\n","|    entropy_loss       | -3.35    |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41399    |\n","|    policy_loss        | 0.00446  |\n","|    std                | 0.561    |\n","|    value_loss         | 7.59e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41500    |\n","|    time_elapsed       | 2588     |\n","|    total_timesteps    | 830000   |\n","| train/                |          |\n","|    entropy_loss       | -3.35    |\n","|    explained_variance | 0.934    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41499    |\n","|    policy_loss        | 0.000369 |\n","|    std                | 0.561    |\n","|    value_loss         | 2.81e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41600    |\n","|    time_elapsed       | 2594     |\n","|    total_timesteps    | 832000   |\n","| train/                |          |\n","|    entropy_loss       | -3.33    |\n","|    explained_variance | 0.742    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41599    |\n","|    policy_loss        | 0.00589  |\n","|    std                | 0.558    |\n","|    value_loss         | 3.23e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41700    |\n","|    time_elapsed       | 2600     |\n","|    total_timesteps    | 834000   |\n","| train/                |          |\n","|    entropy_loss       | -3.32    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41699    |\n","|    policy_loss        | -0.0037  |\n","|    std                | 0.556    |\n","|    value_loss         | 4.79e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 50       |\n","|    ep_rew_mean        | -50      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41800    |\n","|    time_elapsed       | 2606     |\n","|    total_timesteps    | 836000   |\n","| train/                |          |\n","|    entropy_loss       | -3.32    |\n","|    explained_variance | 0.966    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41799    |\n","|    policy_loss        | -0.00179 |\n","|    std                | 0.557    |\n","|    value_loss         | 1.14e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 50       |\n","|    ep_rew_mean        | -50      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 41900    |\n","|    time_elapsed       | 2611     |\n","|    total_timesteps    | 838000   |\n","| train/                |          |\n","|    entropy_loss       | -3.3     |\n","|    explained_variance | 0.969    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41899    |\n","|    policy_loss        | 0.0025   |\n","|    std                | 0.553    |\n","|    value_loss         | 1.42e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 42000    |\n","|    time_elapsed       | 2618     |\n","|    total_timesteps    | 840000   |\n","| train/                |          |\n","|    entropy_loss       | -3.28    |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 41999    |\n","|    policy_loss        | -0.00296 |\n","|    std                | 0.55     |\n","|    value_loss         | 6.84e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 46.6     |\n","|    ep_rew_mean        | -46.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 42100    |\n","|    time_elapsed       | 2623     |\n","|    total_timesteps    | 842000   |\n","| train/                |          |\n","|    entropy_loss       | -3.27    |\n","|    explained_variance | 0.774    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42099    |\n","|    policy_loss        | 0.00571  |\n","|    std                | 0.549    |\n","|    value_loss         | 1.21e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 46.6     |\n","|    ep_rew_mean        | -46.5    |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 42200    |\n","|    time_elapsed       | 2630     |\n","|    total_timesteps    | 844000   |\n","| train/                |          |\n","|    entropy_loss       | -3.24    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42199    |\n","|    policy_loss        | -0.00449 |\n","|    std                | 0.546    |\n","|    value_loss         | 1.13e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 42300    |\n","|    time_elapsed       | 2635     |\n","|    total_timesteps    | 846000   |\n","| train/                |          |\n","|    entropy_loss       | -3.24    |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42299    |\n","|    policy_loss        | -0.00185 |\n","|    std                | 0.546    |\n","|    value_loss         | 1.06e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 320      |\n","|    iterations         | 42400    |\n","|    time_elapsed       | 2642     |\n","|    total_timesteps    | 848000   |\n","| train/                |          |\n","|    entropy_loss       | -3.25    |\n","|    explained_variance | 0.836    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42399    |\n","|    policy_loss        | 0.00408  |\n","|    std                | 0.547    |\n","|    value_loss         | 2.96e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.3     |\n","|    ep_rew_mean        | -48.3    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 42500    |\n","|    time_elapsed       | 2647     |\n","|    total_timesteps    | 850000   |\n","| train/                |          |\n","|    entropy_loss       | -3.21    |\n","|    explained_variance | 0.996    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42499    |\n","|    policy_loss        | -0.00118 |\n","|    std                | 0.542    |\n","|    value_loss         | 2.75e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48.3      |\n","|    ep_rew_mean        | -48.3     |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 42600     |\n","|    time_elapsed       | 2653      |\n","|    total_timesteps    | 852000    |\n","| train/                |           |\n","|    entropy_loss       | -3.21     |\n","|    explained_variance | 0.986     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 42599     |\n","|    policy_loss        | -3.49e-06 |\n","|    std                | 0.542     |\n","|    value_loss         | 1.92e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 42700    |\n","|    time_elapsed       | 2659     |\n","|    total_timesteps    | 854000   |\n","| train/                |          |\n","|    entropy_loss       | -3.2     |\n","|    explained_variance | 0.969    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42699    |\n","|    policy_loss        | -0.00442 |\n","|    std                | 0.54     |\n","|    value_loss         | 1.19e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 42800    |\n","|    time_elapsed       | 2664     |\n","|    total_timesteps    | 856000   |\n","| train/                |          |\n","|    entropy_loss       | -3.18    |\n","|    explained_variance | 0.998    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42799    |\n","|    policy_loss        | -0.00345 |\n","|    std                | 0.538    |\n","|    value_loss         | 1.99e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 42900    |\n","|    time_elapsed       | 2671     |\n","|    total_timesteps    | 858000   |\n","| train/                |          |\n","|    entropy_loss       | -3.18    |\n","|    explained_variance | 0.88     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42899    |\n","|    policy_loss        | 0.00648  |\n","|    std                | 0.538    |\n","|    value_loss         | 6.45e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 43000    |\n","|    time_elapsed       | 2676     |\n","|    total_timesteps    | 860000   |\n","| train/                |          |\n","|    entropy_loss       | -3.19    |\n","|    explained_variance | 0.992    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 42999    |\n","|    policy_loss        | 0.00238  |\n","|    std                | 0.538    |\n","|    value_loss         | 1.14e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 43100    |\n","|    time_elapsed       | 2683     |\n","|    total_timesteps    | 862000   |\n","| train/                |          |\n","|    entropy_loss       | -3.19    |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43099    |\n","|    policy_loss        | -0.00088 |\n","|    std                | 0.538    |\n","|    value_loss         | 2.91e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 49.5      |\n","|    ep_rew_mean        | -49.5     |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 43200     |\n","|    time_elapsed       | 2688      |\n","|    total_timesteps    | 864000    |\n","| train/                |           |\n","|    entropy_loss       | -3.18     |\n","|    explained_variance | 0.992     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 43199     |\n","|    policy_loss        | -0.000387 |\n","|    std                | 0.537     |\n","|    value_loss         | 3.48e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 43300    |\n","|    time_elapsed       | 2695     |\n","|    total_timesteps    | 866000   |\n","| train/                |          |\n","|    entropy_loss       | -3.16    |\n","|    explained_variance | 0.977    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43299    |\n","|    policy_loss        | -0.00078 |\n","|    std                | 0.535    |\n","|    value_loss         | 9.21e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 43400    |\n","|    time_elapsed       | 2700     |\n","|    total_timesteps    | 868000   |\n","| train/                |          |\n","|    entropy_loss       | -3.15    |\n","|    explained_variance | 0.983    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43399    |\n","|    policy_loss        | -0.00333 |\n","|    std                | 0.533    |\n","|    value_loss         | 5.55e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 43500    |\n","|    time_elapsed       | 2706     |\n","|    total_timesteps    | 870000   |\n","| train/                |          |\n","|    entropy_loss       | -3.14    |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43499    |\n","|    policy_loss        | 0.00306  |\n","|    std                | 0.531    |\n","|    value_loss         | 2.46e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 50       |\n","|    ep_rew_mean        | -50      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 43600    |\n","|    time_elapsed       | 2712     |\n","|    total_timesteps    | 872000   |\n","| train/                |          |\n","|    entropy_loss       | -3.14    |\n","|    explained_variance | 0.924    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43599    |\n","|    policy_loss        | -0.00204 |\n","|    std                | 0.531    |\n","|    value_loss         | 1.71e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 43700    |\n","|    time_elapsed       | 2718     |\n","|    total_timesteps    | 874000   |\n","| train/                |          |\n","|    entropy_loss       | -3.17    |\n","|    explained_variance | 0.975    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43699    |\n","|    policy_loss        | -0.00224 |\n","|    std                | 0.535    |\n","|    value_loss         | 1.89e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 43800    |\n","|    time_elapsed       | 2725     |\n","|    total_timesteps    | 876000   |\n","| train/                |          |\n","|    entropy_loss       | -3.15    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43799    |\n","|    policy_loss        | 0.00208  |\n","|    std                | 0.533    |\n","|    value_loss         | 2.37e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 43900    |\n","|    time_elapsed       | 2731     |\n","|    total_timesteps    | 878000   |\n","| train/                |          |\n","|    entropy_loss       | -3.16    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43899    |\n","|    policy_loss        | -0.00073 |\n","|    std                | 0.534    |\n","|    value_loss         | 2.8e-07  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 44000    |\n","|    time_elapsed       | 2737     |\n","|    total_timesteps    | 880000   |\n","| train/                |          |\n","|    entropy_loss       | -3.15    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 43999    |\n","|    policy_loss        | 0.00276  |\n","|    std                | 0.533    |\n","|    value_loss         | 1.14e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 44100    |\n","|    time_elapsed       | 2743     |\n","|    total_timesteps    | 882000   |\n","| train/                |          |\n","|    entropy_loss       | -3.16    |\n","|    explained_variance | 0.94     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44099    |\n","|    policy_loss        | -0.00433 |\n","|    std                | 0.534    |\n","|    value_loss         | 1.01e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 44200    |\n","|    time_elapsed       | 2749     |\n","|    total_timesteps    | 884000   |\n","| train/                |          |\n","|    entropy_loss       | -3.16    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44199    |\n","|    policy_loss        | 0.00238  |\n","|    std                | 0.534    |\n","|    value_loss         | 2.03e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 49.5      |\n","|    ep_rew_mean        | -49.5     |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 44300     |\n","|    time_elapsed       | 2754      |\n","|    total_timesteps    | 886000    |\n","| train/                |           |\n","|    entropy_loss       | -3.17     |\n","|    explained_variance | 0.989     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 44299     |\n","|    policy_loss        | -0.000595 |\n","|    std                | 0.535     |\n","|    value_loss         | 9.41e-07  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 44400    |\n","|    time_elapsed       | 2761     |\n","|    total_timesteps    | 888000   |\n","| train/                |          |\n","|    entropy_loss       | -3.17    |\n","|    explained_variance | 0.687    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44399    |\n","|    policy_loss        | 0.00357  |\n","|    std                | 0.535    |\n","|    value_loss         | 7.24e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.7     |\n","|    ep_rew_mean        | -48.7    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 44500    |\n","|    time_elapsed       | 2766     |\n","|    total_timesteps    | 890000   |\n","| train/                |          |\n","|    entropy_loss       | -3.15    |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44499    |\n","|    policy_loss        | -0.00561 |\n","|    std                | 0.532    |\n","|    value_loss         | 5.05e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.2     |\n","|    ep_rew_mean        | -49.2    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 44600    |\n","|    time_elapsed       | 2773     |\n","|    total_timesteps    | 892000   |\n","| train/                |          |\n","|    entropy_loss       | -3.13    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44599    |\n","|    policy_loss        | -0.00261 |\n","|    std                | 0.53     |\n","|    value_loss         | 5.65e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.2     |\n","|    ep_rew_mean        | -49.2    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 44700    |\n","|    time_elapsed       | 2779     |\n","|    total_timesteps    | 894000   |\n","| train/                |          |\n","|    entropy_loss       | -3.13    |\n","|    explained_variance | 0.996    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44699    |\n","|    policy_loss        | -0.00164 |\n","|    std                | 0.53     |\n","|    value_loss         | 8.87e-07 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 44800    |\n","|    time_elapsed       | 2784     |\n","|    total_timesteps    | 896000   |\n","| train/                |          |\n","|    entropy_loss       | -3.13    |\n","|    explained_variance | 0.97     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44799    |\n","|    policy_loss        | -0.00109 |\n","|    std                | 0.53     |\n","|    value_loss         | 3.15e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 44900    |\n","|    time_elapsed       | 2790     |\n","|    total_timesteps    | 898000   |\n","| train/                |          |\n","|    entropy_loss       | -3.13    |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44899    |\n","|    policy_loss        | -0.00629 |\n","|    std                | 0.53     |\n","|    value_loss         | 7.41e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 45000    |\n","|    time_elapsed       | 2796     |\n","|    total_timesteps    | 900000   |\n","| train/                |          |\n","|    entropy_loss       | -3.15    |\n","|    explained_variance | 0.965    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 44999    |\n","|    policy_loss        | 0.00125  |\n","|    std                | 0.532    |\n","|    value_loss         | 4.25e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 45100    |\n","|    time_elapsed       | 2802     |\n","|    total_timesteps    | 902000   |\n","| train/                |          |\n","|    entropy_loss       | -3.14    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45099    |\n","|    policy_loss        | 0.000925 |\n","|    std                | 0.531    |\n","|    value_loss         | 1.07e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 45200    |\n","|    time_elapsed       | 2808     |\n","|    total_timesteps    | 904000   |\n","| train/                |          |\n","|    entropy_loss       | -3.14    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45199    |\n","|    policy_loss        | 0.00112  |\n","|    std                | 0.531    |\n","|    value_loss         | 1.55e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 45300    |\n","|    time_elapsed       | 2815     |\n","|    total_timesteps    | 906000   |\n","| train/                |          |\n","|    entropy_loss       | -3.13    |\n","|    explained_variance | 0.991    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45299    |\n","|    policy_loss        | -0.00876 |\n","|    std                | 0.53     |\n","|    value_loss         | 2.02e-05 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48        |\n","|    ep_rew_mean        | -48       |\n","| time/                 |           |\n","|    fps                | 321       |\n","|    iterations         | 45400     |\n","|    time_elapsed       | 2820      |\n","|    total_timesteps    | 908000    |\n","| train/                |           |\n","|    entropy_loss       | -3.11     |\n","|    explained_variance | 0.991     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 45399     |\n","|    policy_loss        | -0.000813 |\n","|    std                | 0.527     |\n","|    value_loss         | 1.07e-05  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 45500    |\n","|    time_elapsed       | 2827     |\n","|    total_timesteps    | 910000   |\n","| train/                |          |\n","|    entropy_loss       | -3.11    |\n","|    explained_variance | 0.00978  |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45499    |\n","|    policy_loss        | 2.07     |\n","|    std                | 0.527    |\n","|    value_loss         | 3.83     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 45600    |\n","|    time_elapsed       | 2832     |\n","|    total_timesteps    | 912000   |\n","| train/                |          |\n","|    entropy_loss       | -3.11    |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45599    |\n","|    policy_loss        | 0.000105 |\n","|    std                | 0.527    |\n","|    value_loss         | 1.18e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 321      |\n","|    iterations         | 45700    |\n","|    time_elapsed       | 2839     |\n","|    total_timesteps    | 914000   |\n","| train/                |          |\n","|    entropy_loss       | -3.1     |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45699    |\n","|    policy_loss        | 0.00209  |\n","|    std                | 0.526    |\n","|    value_loss         | 5.57e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 45800    |\n","|    time_elapsed       | 2844     |\n","|    total_timesteps    | 916000   |\n","| train/                |          |\n","|    entropy_loss       | -3.1     |\n","|    explained_variance | 0.996    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45799    |\n","|    policy_loss        | 0.00412  |\n","|    std                | 0.526    |\n","|    value_loss         | 5.44e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 45900    |\n","|    time_elapsed       | 2850     |\n","|    total_timesteps    | 918000   |\n","| train/                |          |\n","|    entropy_loss       | -3.1     |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45899    |\n","|    policy_loss        | -0.0068  |\n","|    std                | 0.526    |\n","|    value_loss         | 7.49e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 46000    |\n","|    time_elapsed       | 2856     |\n","|    total_timesteps    | 920000   |\n","| train/                |          |\n","|    entropy_loss       | -3.08    |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 45999    |\n","|    policy_loss        | 0.00267  |\n","|    std                | 0.523    |\n","|    value_loss         | 3.03e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 46100    |\n","|    time_elapsed       | 2862     |\n","|    total_timesteps    | 922000   |\n","| train/                |          |\n","|    entropy_loss       | -3.08    |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46099    |\n","|    policy_loss        | -0.00402 |\n","|    std                | 0.523    |\n","|    value_loss         | 4.65e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 46200    |\n","|    time_elapsed       | 2868     |\n","|    total_timesteps    | 924000   |\n","| train/                |          |\n","|    entropy_loss       | -3.08    |\n","|    explained_variance | 0.895    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46199    |\n","|    policy_loss        | -0.00312 |\n","|    std                | 0.523    |\n","|    value_loss         | 4.4e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 46300    |\n","|    time_elapsed       | 2874     |\n","|    total_timesteps    | 926000   |\n","| train/                |          |\n","|    entropy_loss       | -3.09    |\n","|    explained_variance | 0.925    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46299    |\n","|    policy_loss        | 0.000623 |\n","|    std                | 0.524    |\n","|    value_loss         | 4.35e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 46400    |\n","|    time_elapsed       | 2880     |\n","|    total_timesteps    | 928000   |\n","| train/                |          |\n","|    entropy_loss       | -3.09    |\n","|    explained_variance | 0.989    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46399    |\n","|    policy_loss        | -0.00431 |\n","|    std                | 0.525    |\n","|    value_loss         | 3.01e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 49        |\n","|    ep_rew_mean        | -49       |\n","| time/                 |           |\n","|    fps                | 322       |\n","|    iterations         | 46500     |\n","|    time_elapsed       | 2886      |\n","|    total_timesteps    | 930000    |\n","| train/                |           |\n","|    entropy_loss       | -3.06     |\n","|    explained_variance | 0.967     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 46499     |\n","|    policy_loss        | -0.000254 |\n","|    std                | 0.521     |\n","|    value_loss         | 3.46e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 46600    |\n","|    time_elapsed       | 2892     |\n","|    total_timesteps    | 932000   |\n","| train/                |          |\n","|    entropy_loss       | -3.06    |\n","|    explained_variance | 0.922    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46599    |\n","|    policy_loss        | 0.000117 |\n","|    std                | 0.52     |\n","|    value_loss         | 1.52e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 46700    |\n","|    time_elapsed       | 2898     |\n","|    total_timesteps    | 934000   |\n","| train/                |          |\n","|    entropy_loss       | -3.06    |\n","|    explained_variance | 0.949    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46699    |\n","|    policy_loss        | 0.00613  |\n","|    std                | 0.52     |\n","|    value_loss         | 7.55e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.2     |\n","|    ep_rew_mean        | -48.1    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 46800    |\n","|    time_elapsed       | 2904     |\n","|    total_timesteps    | 936000   |\n","| train/                |          |\n","|    entropy_loss       | -3.06    |\n","|    explained_variance | 0.982    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46799    |\n","|    policy_loss        | 0.00283  |\n","|    std                | 0.52     |\n","|    value_loss         | 1.32e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.7     |\n","|    ep_rew_mean        | -48.6    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 46900    |\n","|    time_elapsed       | 2910     |\n","|    total_timesteps    | 938000   |\n","| train/                |          |\n","|    entropy_loss       | -3.06    |\n","|    explained_variance | 0.974    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46899    |\n","|    policy_loss        | -0.00109 |\n","|    std                | 0.52     |\n","|    value_loss         | 3.66e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47000    |\n","|    time_elapsed       | 2915     |\n","|    total_timesteps    | 940000   |\n","| train/                |          |\n","|    entropy_loss       | -3.04    |\n","|    explained_variance | 0.94     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 46999    |\n","|    policy_loss        | 0.000478 |\n","|    std                | 0.518    |\n","|    value_loss         | 4.63e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47100    |\n","|    time_elapsed       | 2922     |\n","|    total_timesteps    | 942000   |\n","| train/                |          |\n","|    entropy_loss       | -3.03    |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47099    |\n","|    policy_loss        | 0.00378  |\n","|    std                | 0.517    |\n","|    value_loss         | 3.4e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47200    |\n","|    time_elapsed       | 2927     |\n","|    total_timesteps    | 944000   |\n","| train/                |          |\n","|    entropy_loss       | -3.03    |\n","|    explained_variance | 0.959    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47199    |\n","|    policy_loss        | -0.00336 |\n","|    std                | 0.516    |\n","|    value_loss         | 3.5e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47300    |\n","|    time_elapsed       | 2934     |\n","|    total_timesteps    | 946000   |\n","| train/                |          |\n","|    entropy_loss       | -3.03    |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47299    |\n","|    policy_loss        | 0.000361 |\n","|    std                | 0.517    |\n","|    value_loss         | 2.69e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47400    |\n","|    time_elapsed       | 2940     |\n","|    total_timesteps    | 948000   |\n","| train/                |          |\n","|    entropy_loss       | -3.01    |\n","|    explained_variance | 0.984    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47399    |\n","|    policy_loss        | -0.00264 |\n","|    std                | 0.514    |\n","|    value_loss         | 4.71e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.5     |\n","|    ep_rew_mean        | -47.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47500    |\n","|    time_elapsed       | 2947     |\n","|    total_timesteps    | 950000   |\n","| train/                |          |\n","|    entropy_loss       | -3.02    |\n","|    explained_variance | 0.938    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47499    |\n","|    policy_loss        | -0.00817 |\n","|    std                | 0.515    |\n","|    value_loss         | 3.61e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47600    |\n","|    time_elapsed       | 2952     |\n","|    total_timesteps    | 952000   |\n","| train/                |          |\n","|    entropy_loss       | -3.03    |\n","|    explained_variance | 0.986    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47599    |\n","|    policy_loss        | 0.00562  |\n","|    std                | 0.517    |\n","|    value_loss         | 7.08e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47700    |\n","|    time_elapsed       | 2959     |\n","|    total_timesteps    | 954000   |\n","| train/                |          |\n","|    entropy_loss       | -3       |\n","|    explained_variance | 0.971    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47699    |\n","|    policy_loss        | 0.00259  |\n","|    std                | 0.513    |\n","|    value_loss         | 1.7e-05  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47800    |\n","|    time_elapsed       | 2965     |\n","|    total_timesteps    | 956000   |\n","| train/                |          |\n","|    entropy_loss       | -3.02    |\n","|    explained_variance | 0.949    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47799    |\n","|    policy_loss        | 0.00306  |\n","|    std                | 0.515    |\n","|    value_loss         | 7.18e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 47900    |\n","|    time_elapsed       | 2972     |\n","|    total_timesteps    | 958000   |\n","| train/                |          |\n","|    entropy_loss       | -3.01    |\n","|    explained_variance | 0.0269   |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47899    |\n","|    policy_loss        | 0.596    |\n","|    std                | 0.512    |\n","|    value_loss         | 3.78     |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 48000    |\n","|    time_elapsed       | 2977     |\n","|    total_timesteps    | 960000   |\n","| train/                |          |\n","|    entropy_loss       | -2.97    |\n","|    explained_variance | 0.979    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 47999    |\n","|    policy_loss        | -0.00173 |\n","|    std                | 0.509    |\n","|    value_loss         | 4.9e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 47.1     |\n","|    ep_rew_mean        | -47      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 48100    |\n","|    time_elapsed       | 2984     |\n","|    total_timesteps    | 962000   |\n","| train/                |          |\n","|    entropy_loss       | -2.97    |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48099    |\n","|    policy_loss        | 0.0103   |\n","|    std                | 0.509    |\n","|    value_loss         | 2.65e-05 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48.5      |\n","|    ep_rew_mean        | -48.5     |\n","| time/                 |           |\n","|    fps                | 322       |\n","|    iterations         | 48200     |\n","|    time_elapsed       | 2990      |\n","|    total_timesteps    | 964000    |\n","| train/                |           |\n","|    entropy_loss       | -2.97     |\n","|    explained_variance | 0.998     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 48199     |\n","|    policy_loss        | -1.97e-05 |\n","|    std                | 0.509     |\n","|    value_loss         | 4.11e-07  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 48300    |\n","|    time_elapsed       | 2996     |\n","|    total_timesteps    | 966000   |\n","| train/                |          |\n","|    entropy_loss       | -2.96    |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48299    |\n","|    policy_loss        | 0.000427 |\n","|    std                | 0.507    |\n","|    value_loss         | 5.28e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 48400    |\n","|    time_elapsed       | 3002     |\n","|    total_timesteps    | 968000   |\n","| train/                |          |\n","|    entropy_loss       | -2.96    |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48399    |\n","|    policy_loss        | 0.000246 |\n","|    std                | 0.507    |\n","|    value_loss         | 2.52e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 48500    |\n","|    time_elapsed       | 3008     |\n","|    total_timesteps    | 970000   |\n","| train/                |          |\n","|    entropy_loss       | -2.94    |\n","|    explained_variance | 0.972    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48499    |\n","|    policy_loss        | -0.00162 |\n","|    std                | 0.505    |\n","|    value_loss         | 1.27e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 48600    |\n","|    time_elapsed       | 3015     |\n","|    total_timesteps    | 972000   |\n","| train/                |          |\n","|    entropy_loss       | -2.93    |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48599    |\n","|    policy_loss        | -0.00136 |\n","|    std                | 0.504    |\n","|    value_loss         | 1.92e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48.5      |\n","|    ep_rew_mean        | -48.5     |\n","| time/                 |           |\n","|    fps                | 322       |\n","|    iterations         | 48700     |\n","|    time_elapsed       | 3020      |\n","|    total_timesteps    | 974000    |\n","| train/                |           |\n","|    entropy_loss       | -2.95     |\n","|    explained_variance | 0.941     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 48699     |\n","|    policy_loss        | -0.000999 |\n","|    std                | 0.506     |\n","|    value_loss         | 4.74e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 48800    |\n","|    time_elapsed       | 3027     |\n","|    total_timesteps    | 976000   |\n","| train/                |          |\n","|    entropy_loss       | -2.93    |\n","|    explained_variance | 0.993    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48799    |\n","|    policy_loss        | 0.00292  |\n","|    std                | 0.504    |\n","|    value_loss         | 3.46e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 48900    |\n","|    time_elapsed       | 3033     |\n","|    total_timesteps    | 978000   |\n","| train/                |          |\n","|    entropy_loss       | -2.9     |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 48899    |\n","|    policy_loss        | -0.00141 |\n","|    std                | 0.5      |\n","|    value_loss         | 4.95e-06 |\n","------------------------------------\n","-------------------------------------\n","| rollout/              |           |\n","|    ep_len_mean        | 48        |\n","|    ep_rew_mean        | -48       |\n","| time/                 |           |\n","|    fps                | 322       |\n","|    iterations         | 49000     |\n","|    time_elapsed       | 3040      |\n","|    total_timesteps    | 980000    |\n","| train/                |           |\n","|    entropy_loss       | -2.9      |\n","|    explained_variance | 0.995     |\n","|    learning_rate      | 0.0007    |\n","|    n_updates          | 48999     |\n","|    policy_loss        | -0.000775 |\n","|    std                | 0.5       |\n","|    value_loss         | 2.01e-06  |\n","-------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 49100    |\n","|    time_elapsed       | 3045     |\n","|    total_timesteps    | 982000   |\n","| train/                |          |\n","|    entropy_loss       | -2.9     |\n","|    explained_variance | 0.975    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49099    |\n","|    policy_loss        | 0.00141  |\n","|    std                | 0.5      |\n","|    value_loss         | 7.38e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 50       |\n","|    ep_rew_mean        | -50      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 49200    |\n","|    time_elapsed       | 3052     |\n","|    total_timesteps    | 984000   |\n","| train/                |          |\n","|    entropy_loss       | -2.9     |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49199    |\n","|    policy_loss        | 3.5e-05  |\n","|    std                | 0.5      |\n","|    value_loss         | 4.75e-07 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49.5     |\n","|    ep_rew_mean        | -49.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 49300    |\n","|    time_elapsed       | 3057     |\n","|    total_timesteps    | 986000   |\n","| train/                |          |\n","|    entropy_loss       | -2.9     |\n","|    explained_variance | 0.99     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49299    |\n","|    policy_loss        | -0.00161 |\n","|    std                | 0.5      |\n","|    value_loss         | 5.11e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 49400    |\n","|    time_elapsed       | 3064     |\n","|    total_timesteps    | 988000   |\n","| train/                |          |\n","|    entropy_loss       | -2.9     |\n","|    explained_variance | 0.997    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49399    |\n","|    policy_loss        | -0.00053 |\n","|    std                | 0.5      |\n","|    value_loss         | 4.06e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 49500    |\n","|    time_elapsed       | 3069     |\n","|    total_timesteps    | 990000   |\n","| train/                |          |\n","|    entropy_loss       | -2.91    |\n","|    explained_variance | 0.88     |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49499    |\n","|    policy_loss        | -0.0311  |\n","|    std                | 0.501    |\n","|    value_loss         | 0.000115 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 49600    |\n","|    time_elapsed       | 3076     |\n","|    total_timesteps    | 992000   |\n","| train/                |          |\n","|    entropy_loss       | -2.9     |\n","|    explained_variance | 0.995    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49599    |\n","|    policy_loss        | 0.000234 |\n","|    std                | 0.5      |\n","|    value_loss         | 1.49e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48       |\n","|    ep_rew_mean        | -48      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 49700    |\n","|    time_elapsed       | 3082     |\n","|    total_timesteps    | 994000   |\n","| train/                |          |\n","|    entropy_loss       | -2.92    |\n","|    explained_variance | 0.985    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49699    |\n","|    policy_loss        | 0.001    |\n","|    std                | 0.502    |\n","|    value_loss         | 1.4e-06  |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 48.5     |\n","|    ep_rew_mean        | -48.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 49800    |\n","|    time_elapsed       | 3087     |\n","|    total_timesteps    | 996000   |\n","| train/                |          |\n","|    entropy_loss       | -2.89    |\n","|    explained_variance | 0.987    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49799    |\n","|    policy_loss        | 0.000412 |\n","|    std                | 0.499    |\n","|    value_loss         | 1.65e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 49       |\n","|    ep_rew_mean        | -49      |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 49900    |\n","|    time_elapsed       | 3094     |\n","|    total_timesteps    | 998000   |\n","| train/                |          |\n","|    entropy_loss       | -2.89    |\n","|    explained_variance | 0.994    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49899    |\n","|    policy_loss        | -0.00102 |\n","|    std                | 0.499    |\n","|    value_loss         | 1.04e-06 |\n","------------------------------------\n","------------------------------------\n","| rollout/              |          |\n","|    ep_len_mean        | 46.6     |\n","|    ep_rew_mean        | -46.5    |\n","| time/                 |          |\n","|    fps                | 322      |\n","|    iterations         | 50000    |\n","|    time_elapsed       | 3099     |\n","|    total_timesteps    | 1000000  |\n","| train/                |          |\n","|    entropy_loss       | -2.84    |\n","|    explained_variance | 0.942    |\n","|    learning_rate      | 0.0007   |\n","|    n_updates          | 49999    |\n","|    policy_loss        | 2.1e-05  |\n","|    std                | 0.492    |\n","|    value_loss         | 1.95e-05 |\n","------------------------------------\n"]},{"output_type":"execute_result","data":{"text/plain":["<stable_baselines3.a2c.a2c.A2C at 0x7fde13cf0a00>"]},"metadata":{},"execution_count":16}],"source":["model.learn(1000000)"]},{"cell_type":"code","execution_count":17,"metadata":{"executionInfo":{"elapsed":3,"status":"ok","timestamp":1697805826543,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"Q4TX9ABssFgF"},"outputs":[],"source":["model.save(\"a2c-PandaPickAndPlace-v3\")\n","env.save(\"vec_normalize.pkl\")"]},{"cell_type":"code","execution_count":18,"metadata":{"id":"LyerZBvys110","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697805828368,"user_tz":-60,"elapsed":1827,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"9f71bc5b-e9e4-4b2f-910e-d7a55c254fad"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n","  warnings.warn(\n"]},{"output_type":"stream","name":"stdout","text":["Mean reward = -45.00 +/- 15.00\n"]}],"source":["eval_env = DummyVecEnv([lambda: gym.make(\"PandaPickAndPlace-v3\")])\n","eval_env = VecNormalize.load(\"vec_normalize.pkl\", eval_env)\n","\n","eval_env.render_mode = \"rgb_array\"\n","eval_env.training = False\n","eval_env.norm_reward = False\n","\n","model = A2C.load(\"a2c-PandaPickAndPlace-v3\")\n","\n","mean_reward, std_reward = evaluate_policy(model, eval_env)\n","\n","print(f\"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}\")"]},{"cell_type":"code","source":["package_to_hub(\n","    model=model,\n","    model_name=f\"a2c-{env_id}\",\n","    model_architecture=\"A2C\",\n","    env_id=env_id,\n","    eval_env=eval_env,\n","    repo_id=f\"jake-walker/a2c-{env_id}\", # TODO: Change the username\n","    commit_message=\"Initial commit\",\n",")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":551,"referenced_widgets":["8bfe2085c7204088a1ef714838c2aadd","4658324a43be490caf922ead1f9d2d1d","4d717050a24841ee8069c6653117b3dc","ce714cf5cac0486fad1eb8cc49d53e0b","c7fe3d6d2f5541a1a0ec140a7741995d","4da35dc2fa704607b4e2504b6af5db88","9bfd6667c1fb449392d491dae77b59ee","7d07efce45fa4670ad96f9fac0dab684","5ef098a570de4d6e87bf8df85a33050e","f454eaeac4ac494c96fc9f7d438e9618","fc7ee0566b384ab9a135b89739531326","896b4650b155414aaba9036d6ebce7b2","0f93e35df61f42daa06dc7c910b1e7b4","9bd21c82a4a543f09ac4cb89657ca830","212f12b9d6ff45d8b5d8d297dfa53156","9f712dbc35054c13aef235348ce1a353","f8d84bf8235c45ec9b50d275f2cfc867","4d8a546735c74852947634ce252f14f7","2e302f20dc9e4937a1de9d9e0d0da7b3","b2d8210c3889402aaf1db961d4cd730f","4c24fc840b5840c7b2a516c419886882","252d9c5dfa0f46118640012285b286fa","eeed4ec9c9ce45a3bab7fd3e0d1d4ea8","5ba4f0e7d5fa419a9233aa2a4992b2ba","86caab393aa44dd5b435691285196c4d","5adfed80531b441690a0b5249c11b32c","289467336d58463db15339430e5c1f00","3a696b2695294d8995bda6871ed7df08","544f6f375b8b49a6a66b65893d66003d","fea8d1537e2e442ebdaa06366204e0a9","13dad26515bf437d8c280e32b793f09d","eed2665a7bcd4fb282df5ad004eb05a8","4b571dd45888423aadcd2e70935f55b0","b0e0e8fb7edc47b19bff9a9faab31ef7","1c15e9adfc804dc5ba65761294fd275e","f21549e660bd46fbaf03df38e546ba08","ca2b3ed6a1c848ffbf3d53ebfca2dbdd","714f4ce0c25a4f738cde857cd1d6b30d","e9413c0eaf4a44f293605decc0b286a2","82fad2f7c01d463bab4a4685da4bb3b5","99f9e09f5a474f79acf765cbc28722cd","129e7a89f2be4012bf4490a3fa2e4c56","86138644c37841e1ba73c60d459dedfb","8fe0f5c471864bc48cf66de37638442f","ca10c36b5bd54c74886863dcccba4633","b0c371c0b3024ef9857f25708fdccf13","6a11ced9206048c889c7d6ccc57a6ba2","db3086118fad4361aa92a46357af3d91","d85d82c65da14ec6b67f829c5edeea64","bc915aec907f4821a163a866e7fb8bb6","e8085db08af9497886f160b014c1307f","bd19549616904684bfc20c89b6ff3b63","cd1a75e4e84b45bfa8ed4af0f62847d9","7ab8e6c616f54b88ac34e0ba9b377f20","403401a6fd8a4961ac78e66df6b9925f","ab5cfc2daaef4a4fb0ba2fbe17c60144","e5736ce72bcc46c28347051dbcd02c88","ada769094a464e5799d67dc522cd5637","bd181fd072f84a82a46d048dbae72836","e811d4e6b28844f484e569c03576443c","b21d8aa148154174b6cce2dedb60d281","bfe4ac26ee404f56ac7ae070e0034fab","ff0e9057df6946f9a590acb7431fbc71","29a8b81811d948dab9db8dbacc0a2d34","53dde5d8c4934b81b254ac78a09b5dba","157c384de31845f1a345a9b44d3ebee6"]},"id":"BueNqaiXIJCb","executionInfo":{"status":"ok","timestamp":1697806439616,"user_tz":-60,"elapsed":335108,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"53a4c245-081d-4101-8ab9-0d20d1ca0260"},"execution_count":21,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n","  and should_run_async(code)\n"]},{"output_type":"stream","name":"stdout","text":["\u001b[38;5;4mℹ This function will save, evaluate, generate a video of your agent,\n","create a model card and push everything to the hub. It might take up to 1min.\n","This is a work in progress: if you encounter a bug, please open an issue.\u001b[0m\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n","  warnings.warn(\n"]},{"output_type":"stream","name":"stdout","text":["Saving video to /tmp/tmp94l0t13f/-step-0-to-step-1000.mp4\n","Moviepy - Building video /tmp/tmp94l0t13f/-step-0-to-step-1000.mp4.\n","Moviepy - Writing video /tmp/tmp94l0t13f/-step-0-to-step-1000.mp4\n","\n"]},{"output_type":"stream","name":"stderr","text":[]},{"output_type":"stream","name":"stdout","text":["Moviepy - Done !\n","Moviepy - video ready /tmp/tmp94l0t13f/-step-0-to-step-1000.mp4\n","\u001b[38;5;4mℹ Pushing repo jake-walker/a2c-PandaPickAndPlace-v3 to the Hugging Face\n","Hub\u001b[0m\n"]},{"output_type":"display_data","data":{"text/plain":["policy.optimizer.pth:   0%|          | 0.00/52.1k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"8bfe2085c7204088a1ef714838c2aadd"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["policy.pth:   0%|          | 0.00/53.4k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"896b4650b155414aaba9036d6ebce7b2"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"eeed4ec9c9ce45a3bab7fd3e0d1d4ea8"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["pytorch_variables.pth:   0%|          | 0.00/864 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b0e0e8fb7edc47b19bff9a9faab31ef7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["a2c-PandaPickAndPlace-v3.zip:   0%|          | 0.00/124k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ca10c36b5bd54c74886863dcccba4633"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["vec_normalize.pkl:   0%|          | 0.00/3.02k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ab5cfc2daaef4a4fb0ba2fbe17c60144"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["\u001b[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:\n","https://huggingface.co/jake-walker/a2c-PandaPickAndPlace-v3/tree/main/\u001b[0m\n"]},{"output_type":"execute_result","data":{"text/plain":["'https://huggingface.co/jake-walker/a2c-PandaPickAndPlace-v3/tree/main/'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":21}]},{"cell_type":"markdown","metadata":{"id":"sKGbFXZq9ikN"},"source":["### Solution (optional)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"J-cC-Feg9iMm"},"outputs":[],"source":["# 1 - 2\n","env_id = \"PandaPickAndPlace-v3\"\n","env = make_vec_env(env_id, n_envs=4)\n","\n","# 3\n","env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)\n","\n","# 4\n","model = A2C(policy = \"MultiInputPolicy\",\n","            env = env,\n","            verbose=1)\n","# 5\n","model.learn(1_000_000)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"-UnlKLmpg80p"},"outputs":[],"source":["# 6\n","model_name = \"a2c-PandaPickAndPlace-v3\";\n","model.save(model_name)\n","env.save(\"vec_normalize.pkl\")\n","\n","# 7\n","from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n","\n","# Load the saved statistics\n","eval_env = DummyVecEnv([lambda: gym.make(\"PandaPickAndPlace-v3\")])\n","eval_env = VecNormalize.load(\"vec_normalize.pkl\", eval_env)\n","\n","#  do not update them at test time\n","eval_env.training = False\n","# reward normalization is not needed at test time\n","eval_env.norm_reward = False\n","\n","# Load the agent\n","model = A2C.load(model_name)\n","\n","mean_reward, std_reward = evaluate_policy(model, eval_env)\n","\n","print(f\"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}\")\n","\n","# 8\n","package_to_hub(\n","    model=model,\n","    model_name=f\"a2c-{env_id}\",\n","    model_architecture=\"A2C\",\n","    env_id=env_id,\n","    eval_env=eval_env,\n","    repo_id=f\"ThomasSimonini/a2c-{env_id}\", # TODO: Change the username\n","    commit_message=\"Initial commit\",\n",")"]},{"cell_type":"markdown","metadata":{"id":"usatLaZ8dM4P"},"source":["See you on Unit 7! 🔥\n","## Keep learning, stay awesome 🤗"]}],"metadata":{"accelerator":"GPU","colab":{"collapsed_sections":["tF42HvI7-gs5","nWAuOOLh-oQf","sKGbFXZq9ikN"],"provenance":[{"file_id":"https://github.com/huggingface/deep-rl-class/blob/main/notebooks/unit6/unit6.ipynb","timestamp":1697458612072}]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"02592ae9ec374e17bdb94cd63f2da4bd":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"028e6e2c1a03429b8373796a2d15f967":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6616f6dcdf144fa8932aff1ba15465c9","placeholder":"​","style":"IPY_MODEL_8bf2d5b027704604af3f73ec14ddb21a","value":"Upload 5 LFS files: 100%"}},"03723069ce9b4425a22d248e7aeb78b1":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_352ea4dca3e84a10bc0961f0bc04c8ed","placeholder":"​","style":"IPY_MODEL_19bce1eac4e448cf8fff65ec32d4bdff","value":" 5/5 [00:01&lt;00:00,  4.13it/s]"}},"049e2a4373be4f81a52be5a71516c526":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"066e7917847e4ae8b122bcd4ada26c62":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e2f0c8219f404efe82bf10e2fe4519c7","placeholder":"​","style":"IPY_MODEL_9a0dc2519e464195a25808bacdb42100","value":"vec_normalize.pkl: 100%"}},"06ce39f472d2434b84146a451937a445":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_5d05a3d491c24cc98443664571baefdf","IPY_MODEL_ab92784efd82433a980743969c68ad7d","IPY_MODEL_277962f6adf04e8a8ac0b62299c61deb"],"layout":"IPY_MODEL_2db79e59c6e8432fad82d2eb1f3fc7b9"}},"0ff9d960df92419f9fd288dd6b618071":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1407f798d5ae4d758109d725772f584b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1673319fc069427288593cb2fefef641":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"19bce1eac4e448cf8fff65ec32d4bdff":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1edf516ba9744c2ea3430a450f11f199":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"20ae4498ab524acaab04f0d21cf2a8ce":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3fc0651477c24fbf81d97bd5dcb841f8","placeholder":"​","style":"IPY_MODEL_75ad83ef32ec418a94e095aebc0cb985","value":"policy.optimizer.pth: 100%"}},"277962f6adf04e8a8ac0b62299c61deb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d04fd69fbca14c11a843094501a2cf37","placeholder":"​","style":"IPY_MODEL_a9b0ed8956644ad7945dde85dd827be2","value":" 864/864 [00:00&lt;00:00, 1.41kB/s]"}},"2db79e59c6e8432fad82d2eb1f3fc7b9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"32acd81ca6df45b6aa8b366de0499587":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"352ea4dca3e84a10bc0961f0bc04c8ed":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"396f842ece3d4b9980926f0bb7ac3ce4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_066e7917847e4ae8b122bcd4ada26c62","IPY_MODEL_fa8361afc19c460cb31259401ee38e3e","IPY_MODEL_89c1f8c6c17d4a9792c78ef20daf9ac8"],"layout":"IPY_MODEL_1edf516ba9744c2ea3430a450f11f199"}},"3999835733484e52b53d5e498ae53b7d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3fc0651477c24fbf81d97bd5dcb841f8":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"40e030067ccb475cac78dc4a430736ca":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"45ed0117c2d1482a922d3922cc8fc955":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9a9d2950aca641cdac9b87b95f3fb6b4","placeholder":"​","style":"IPY_MODEL_e14a2798b6b24a3cb6d084266ca295bb","value":"a2c-PandaReachDense-v3.zip: 100%"}},"5296d67b4b0f4cdb850d50f3d7f80e77":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5d05a3d491c24cc98443664571baefdf":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c9389a3714cb4dc08064be1f9205632f","placeholder":"​","style":"IPY_MODEL_e481844b06824b0bacaf92ce8839a432","value":"pytorch_variables.pth: 100%"}},"62f79e70b55545deb23cd27b8f48a5ec":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6616f6dcdf144fa8932aff1ba15465c9":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"678252b688c84e33b3bc832756ac68d2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e8bc7022a6874202aa151b4db213f72c","max":45167,"min":0,"orientation":"horizontal","style":"IPY_MODEL_73c9dfddecb44fcfa87786c9d65e274e","value":45167}},"67fb52d7ed0f447f811f8782ee3ea378":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6ab04382062f4119a74bab2d1bae271e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6f6579e6aca445c6ac139fa5ca46a659":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"73c9dfddecb44fcfa87786c9d65e274e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"75ad83ef32ec418a94e095aebc0cb985":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7dd3d3c4569f4a92afaf3f044a2a6cc8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_02592ae9ec374e17bdb94cd63f2da4bd","placeholder":"​","style":"IPY_MODEL_67fb52d7ed0f447f811f8782ee3ea378","value":" 108k/108k [00:00&lt;00:00, 27.7kB/s]"}},"834893cddc7b4518ad9f58de74b9df26":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"89c1f8c6c17d4a9792c78ef20daf9ac8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5296d67b4b0f4cdb850d50f3d7f80e77","placeholder":"​","style":"IPY_MODEL_b073e64700c94739a22fac4388517029","value":" 2.62k/2.62k [00:00&lt;00:00, 4.51kB/s]"}},"89ef19f39dce4674a2a6146c8eb70146":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_92efda203d24453a9a12bf051289febf","placeholder":"​","style":"IPY_MODEL_98f1895dbcd047288b3049b250db4e98","value":"policy.pth: 100%"}},"8bf2d5b027704604af3f73ec14ddb21a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8cce6df0cb3b4d188f8d92e8455bf4a2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"92efda203d24453a9a12bf051289febf":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"98f1895dbcd047288b3049b250db4e98":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9a0dc2519e464195a25808bacdb42100":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9a9d2950aca641cdac9b87b95f3fb6b4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9da8d2763c704661932950c8e9d55050":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"a856c29927664d8b9321d85b5b3cb3dc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a9b0ed8956644ad7945dde85dd827be2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"aa165a5cc209400fb50c46c5669b4f5a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_028e6e2c1a03429b8373796a2d15f967","IPY_MODEL_fe209bc1ebce42139af45e4f0b4ed3c6","IPY_MODEL_03723069ce9b4425a22d248e7aeb78b1"],"layout":"IPY_MODEL_8cce6df0cb3b4d188f8d92e8455bf4a2"}},"ab92784efd82433a980743969c68ad7d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_d1852a9716694263aea37c369396a1a2","max":864,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9da8d2763c704661932950c8e9d55050","value":864}},"ad5d3851a13a4c159ccdbec4dce5d734":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_834893cddc7b4518ad9f58de74b9df26","placeholder":"​","style":"IPY_MODEL_40e030067ccb475cac78dc4a430736ca","value":" 46.4k/46.4k [00:00&lt;00:00, 27.5kB/s]"}},"af9d0aa3438a49638d7d7e4ded636508":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1407f798d5ae4d758109d725772f584b","max":46447,"min":0,"orientation":"horizontal","style":"IPY_MODEL_1673319fc069427288593cb2fefef641","value":46447}},"b073e64700c94739a22fac4388517029":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bd79e66fd19d45298dc1d6547942cf32":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_89ef19f39dce4674a2a6146c8eb70146","IPY_MODEL_af9d0aa3438a49638d7d7e4ded636508","IPY_MODEL_ad5d3851a13a4c159ccdbec4dce5d734"],"layout":"IPY_MODEL_049e2a4373be4f81a52be5a71516c526"}},"bda773c9317c45c3a684191ff207a93c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"bf3de84002be4b77813017549727ab3b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c9389a3714cb4dc08064be1f9205632f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cf3e525363dc4c6c89daa667732a68b8":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_20ae4498ab524acaab04f0d21cf2a8ce","IPY_MODEL_678252b688c84e33b3bc832756ac68d2","IPY_MODEL_f8329a0a6dea43db9879e5d011af5fee"],"layout":"IPY_MODEL_0ff9d960df92419f9fd288dd6b618071"}},"d04fd69fbca14c11a843094501a2cf37":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d1852a9716694263aea37c369396a1a2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2f082ae3be04798bab8442360d8f0df":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e80750311a3c4c528162e40f5899cadf","max":108131,"min":0,"orientation":"horizontal","style":"IPY_MODEL_bda773c9317c45c3a684191ff207a93c","value":108131}},"db0467c7229944f6aed12ebe1dcd5fe7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_45ed0117c2d1482a922d3922cc8fc955","IPY_MODEL_d2f082ae3be04798bab8442360d8f0df","IPY_MODEL_7dd3d3c4569f4a92afaf3f044a2a6cc8"],"layout":"IPY_MODEL_62f79e70b55545deb23cd27b8f48a5ec"}},"e14a2798b6b24a3cb6d084266ca295bb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e2f0c8219f404efe82bf10e2fe4519c7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e481844b06824b0bacaf92ce8839a432":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e80750311a3c4c528162e40f5899cadf":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e8bc7022a6874202aa151b4db213f72c":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f8329a0a6dea43db9879e5d011af5fee":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bf3de84002be4b77813017549727ab3b","placeholder":"​","style":"IPY_MODEL_a856c29927664d8b9321d85b5b3cb3dc","value":" 45.2k/45.2k [00:00&lt;00:00, 28.0kB/s]"}},"fa8361afc19c460cb31259401ee38e3e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_3999835733484e52b53d5e498ae53b7d","max":2623,"min":0,"orientation":"horizontal","style":"IPY_MODEL_32acd81ca6df45b6aa8b366de0499587","value":2623}},"fe209bc1ebce42139af45e4f0b4ed3c6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_6ab04382062f4119a74bab2d1bae271e","max":5,"min":0,"orientation":"horizontal","style":"IPY_MODEL_6f6579e6aca445c6ac139fa5ca46a659","value":5}},"9cacf8dbc7a94fa481e1cc40676692c8":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_d0f67ad35369477185dbcf6b0a7c07c3","IPY_MODEL_04bbbd03a7c94340a458d0dcd7f1bca5","IPY_MODEL_8c3c6264b6284db3957ed7f15a90a601","IPY_MODEL_70f52659a1d84dc7ac1ed4648c4de55d"],"layout":"IPY_MODEL_3158042f4a7747b083201b336c509833"}},"e1bcfc8d49874c0a87984a1519ce9a02":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9d9554707a1842c9a21b1d7df3a6e8ea","placeholder":"​","style":"IPY_MODEL_005095e28ba94fab9433cc3e46079300","value":"<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"}},"9c37b1a06401475892e32e64cfc7228b":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_8d515b922b3149ef997885c4a1b8f57d","placeholder":"​","style":"IPY_MODEL_76fca4ceb1b74730ad80914a5526a33a","value":""}},"7d05761914c6458abd83d3320d5e6774":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_0b24db20f40e460c91fa1090d0286e80","style":"IPY_MODEL_0cd333170d704dde92b6826238507f38","value":true}},"4fe7ce97f96c4d3ea8d42decfd98c7fd":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_4f7f847624ad487ba3e8e0c136fd86ee","style":"IPY_MODEL_b8228ef61d2147368167e35c75a14b2b","tooltip":""}},"3e23a54fdb7044d38aaf172664698871":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9b9e93708a824114abdb56ffb3a6bbdd","placeholder":"​","style":"IPY_MODEL_9dd1a10e441c4a3da2c870f40c55d047","value":"\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"}},"3158042f4a7747b083201b336c509833":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"9d9554707a1842c9a21b1d7df3a6e8ea":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"005095e28ba94fab9433cc3e46079300":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8d515b922b3149ef997885c4a1b8f57d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"76fca4ceb1b74730ad80914a5526a33a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0b24db20f40e460c91fa1090d0286e80":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0cd333170d704dde92b6826238507f38":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4f7f847624ad487ba3e8e0c136fd86ee":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b8228ef61d2147368167e35c75a14b2b":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"9b9e93708a824114abdb56ffb3a6bbdd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9dd1a10e441c4a3da2c870f40c55d047":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5f30f9cbb28f4826af8b9c9b87ec5b88":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_18bbbe029e1f42bea69347d5de28d2cb","placeholder":"​","style":"IPY_MODEL_ffa9936e6f3846b9b215c6e03396106e","value":"Connecting..."}},"18bbbe029e1f42bea69347d5de28d2cb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ffa9936e6f3846b9b215c6e03396106e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d0f67ad35369477185dbcf6b0a7c07c3":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a0f8c961d3c64db08c3555bd566fe955","placeholder":"​","style":"IPY_MODEL_db52efeddaff4046aad1067fefe78656","value":"Token is valid (permission: write)."}},"04bbbd03a7c94340a458d0dcd7f1bca5":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d7a899e7eaa8452fb8caf7f064ca96c3","placeholder":"​","style":"IPY_MODEL_66a44c2579634b54860458073bec84be","value":"Your token has been saved in your configured git credential helpers (store)."}},"8c3c6264b6284db3957ed7f15a90a601":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4e1d4dfdb773409d8f24f035608de8ab","placeholder":"​","style":"IPY_MODEL_d8bdb828b07b442eb51a0499c552dc5a","value":"Your token has been saved to /root/.cache/huggingface/token"}},"70f52659a1d84dc7ac1ed4648c4de55d":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_32c2bbf73b5549e28273810c28419711","placeholder":"​","style":"IPY_MODEL_fbdfe5c1594543138016fb9cd417930e","value":"Login successful"}},"a0f8c961d3c64db08c3555bd566fe955":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"db52efeddaff4046aad1067fefe78656":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d7a899e7eaa8452fb8caf7f064ca96c3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"66a44c2579634b54860458073bec84be":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4e1d4dfdb773409d8f24f035608de8ab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d8bdb828b07b442eb51a0499c552dc5a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"32c2bbf73b5549e28273810c28419711":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fbdfe5c1594543138016fb9cd417930e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8bfe2085c7204088a1ef714838c2aadd":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4658324a43be490caf922ead1f9d2d1d","IPY_MODEL_4d717050a24841ee8069c6653117b3dc","IPY_MODEL_ce714cf5cac0486fad1eb8cc49d53e0b"],"layout":"IPY_MODEL_c7fe3d6d2f5541a1a0ec140a7741995d"}},"4658324a43be490caf922ead1f9d2d1d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4da35dc2fa704607b4e2504b6af5db88","placeholder":"​","style":"IPY_MODEL_9bfd6667c1fb449392d491dae77b59ee","value":"policy.optimizer.pth: 100%"}},"4d717050a24841ee8069c6653117b3dc":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_7d07efce45fa4670ad96f9fac0dab684","max":52079,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5ef098a570de4d6e87bf8df85a33050e","value":52079}},"ce714cf5cac0486fad1eb8cc49d53e0b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f454eaeac4ac494c96fc9f7d438e9618","placeholder":"​","style":"IPY_MODEL_fc7ee0566b384ab9a135b89739531326","value":" 52.1k/52.1k [00:00&lt;00:00, 24.9kB/s]"}},"c7fe3d6d2f5541a1a0ec140a7741995d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4da35dc2fa704607b4e2504b6af5db88":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9bfd6667c1fb449392d491dae77b59ee":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7d07efce45fa4670ad96f9fac0dab684":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5ef098a570de4d6e87bf8df85a33050e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f454eaeac4ac494c96fc9f7d438e9618":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fc7ee0566b384ab9a135b89739531326":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"896b4650b155414aaba9036d6ebce7b2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0f93e35df61f42daa06dc7c910b1e7b4","IPY_MODEL_9bd21c82a4a543f09ac4cb89657ca830","IPY_MODEL_212f12b9d6ff45d8b5d8d297dfa53156"],"layout":"IPY_MODEL_9f712dbc35054c13aef235348ce1a353"}},"0f93e35df61f42daa06dc7c910b1e7b4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8d84bf8235c45ec9b50d275f2cfc867","placeholder":"​","style":"IPY_MODEL_4d8a546735c74852947634ce252f14f7","value":"policy.pth: 100%"}},"9bd21c82a4a543f09ac4cb89657ca830":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_2e302f20dc9e4937a1de9d9e0d0da7b3","max":53359,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b2d8210c3889402aaf1db961d4cd730f","value":53359}},"212f12b9d6ff45d8b5d8d297dfa53156":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4c24fc840b5840c7b2a516c419886882","placeholder":"​","style":"IPY_MODEL_252d9c5dfa0f46118640012285b286fa","value":" 53.4k/53.4k [00:00&lt;00:00, 25.4kB/s]"}},"9f712dbc35054c13aef235348ce1a353":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f8d84bf8235c45ec9b50d275f2cfc867":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4d8a546735c74852947634ce252f14f7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2e302f20dc9e4937a1de9d9e0d0da7b3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b2d8210c3889402aaf1db961d4cd730f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4c24fc840b5840c7b2a516c419886882":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"252d9c5dfa0f46118640012285b286fa":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"eeed4ec9c9ce45a3bab7fd3e0d1d4ea8":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_5ba4f0e7d5fa419a9233aa2a4992b2ba","IPY_MODEL_86caab393aa44dd5b435691285196c4d","IPY_MODEL_5adfed80531b441690a0b5249c11b32c"],"layout":"IPY_MODEL_289467336d58463db15339430e5c1f00"}},"5ba4f0e7d5fa419a9233aa2a4992b2ba":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3a696b2695294d8995bda6871ed7df08","placeholder":"​","style":"IPY_MODEL_544f6f375b8b49a6a66b65893d66003d","value":"Upload 5 LFS files: 100%"}},"86caab393aa44dd5b435691285196c4d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fea8d1537e2e442ebdaa06366204e0a9","max":5,"min":0,"orientation":"horizontal","style":"IPY_MODEL_13dad26515bf437d8c280e32b793f09d","value":5}},"5adfed80531b441690a0b5249c11b32c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_eed2665a7bcd4fb282df5ad004eb05a8","placeholder":"​","style":"IPY_MODEL_4b571dd45888423aadcd2e70935f55b0","value":" 5/5 [00:01&lt;00:00,  3.15it/s]"}},"289467336d58463db15339430e5c1f00":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3a696b2695294d8995bda6871ed7df08":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"544f6f375b8b49a6a66b65893d66003d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fea8d1537e2e442ebdaa06366204e0a9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"13dad26515bf437d8c280e32b793f09d":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"eed2665a7bcd4fb282df5ad004eb05a8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4b571dd45888423aadcd2e70935f55b0":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b0e0e8fb7edc47b19bff9a9faab31ef7":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_1c15e9adfc804dc5ba65761294fd275e","IPY_MODEL_f21549e660bd46fbaf03df38e546ba08","IPY_MODEL_ca2b3ed6a1c848ffbf3d53ebfca2dbdd"],"layout":"IPY_MODEL_714f4ce0c25a4f738cde857cd1d6b30d"}},"1c15e9adfc804dc5ba65761294fd275e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e9413c0eaf4a44f293605decc0b286a2","placeholder":"​","style":"IPY_MODEL_82fad2f7c01d463bab4a4685da4bb3b5","value":"pytorch_variables.pth: 100%"}},"f21549e660bd46fbaf03df38e546ba08":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_99f9e09f5a474f79acf765cbc28722cd","max":864,"min":0,"orientation":"horizontal","style":"IPY_MODEL_129e7a89f2be4012bf4490a3fa2e4c56","value":864}},"ca2b3ed6a1c848ffbf3d53ebfca2dbdd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_86138644c37841e1ba73c60d459dedfb","placeholder":"​","style":"IPY_MODEL_8fe0f5c471864bc48cf66de37638442f","value":" 864/864 [00:00&lt;00:00, 1.32kB/s]"}},"714f4ce0c25a4f738cde857cd1d6b30d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e9413c0eaf4a44f293605decc0b286a2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"82fad2f7c01d463bab4a4685da4bb3b5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"99f9e09f5a474f79acf765cbc28722cd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"129e7a89f2be4012bf4490a3fa2e4c56":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"86138644c37841e1ba73c60d459dedfb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8fe0f5c471864bc48cf66de37638442f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ca10c36b5bd54c74886863dcccba4633":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b0c371c0b3024ef9857f25708fdccf13","IPY_MODEL_6a11ced9206048c889c7d6ccc57a6ba2","IPY_MODEL_db3086118fad4361aa92a46357af3d91"],"layout":"IPY_MODEL_d85d82c65da14ec6b67f829c5edeea64"}},"b0c371c0b3024ef9857f25708fdccf13":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bc915aec907f4821a163a866e7fb8bb6","placeholder":"​","style":"IPY_MODEL_e8085db08af9497886f160b014c1307f","value":"a2c-PandaPickAndPlace-v3.zip: 100%"}},"6a11ced9206048c889c7d6ccc57a6ba2":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bd19549616904684bfc20c89b6ff3b63","max":124467,"min":0,"orientation":"horizontal","style":"IPY_MODEL_cd1a75e4e84b45bfa8ed4af0f62847d9","value":124467}},"db3086118fad4361aa92a46357af3d91":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7ab8e6c616f54b88ac34e0ba9b377f20","placeholder":"​","style":"IPY_MODEL_403401a6fd8a4961ac78e66df6b9925f","value":" 124k/124k [00:00&lt;00:00, 28.0kB/s]"}},"d85d82c65da14ec6b67f829c5edeea64":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bc915aec907f4821a163a866e7fb8bb6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e8085db08af9497886f160b014c1307f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bd19549616904684bfc20c89b6ff3b63":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cd1a75e4e84b45bfa8ed4af0f62847d9":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"7ab8e6c616f54b88ac34e0ba9b377f20":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"403401a6fd8a4961ac78e66df6b9925f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ab5cfc2daaef4a4fb0ba2fbe17c60144":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e5736ce72bcc46c28347051dbcd02c88","IPY_MODEL_ada769094a464e5799d67dc522cd5637","IPY_MODEL_bd181fd072f84a82a46d048dbae72836"],"layout":"IPY_MODEL_e811d4e6b28844f484e569c03576443c"}},"e5736ce72bcc46c28347051dbcd02c88":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b21d8aa148154174b6cce2dedb60d281","placeholder":"​","style":"IPY_MODEL_bfe4ac26ee404f56ac7ae070e0034fab","value":"vec_normalize.pkl: 100%"}},"ada769094a464e5799d67dc522cd5637":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ff0e9057df6946f9a590acb7431fbc71","max":3023,"min":0,"orientation":"horizontal","style":"IPY_MODEL_29a8b81811d948dab9db8dbacc0a2d34","value":3023}},"bd181fd072f84a82a46d048dbae72836":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_53dde5d8c4934b81b254ac78a09b5dba","placeholder":"​","style":"IPY_MODEL_157c384de31845f1a345a9b44d3ebee6","value":" 3.02k/3.02k [00:00&lt;00:00, 5.75kB/s]"}},"e811d4e6b28844f484e569c03576443c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b21d8aa148154174b6cce2dedb60d281":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bfe4ac26ee404f56ac7ae070e0034fab":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ff0e9057df6946f9a590acb7431fbc71":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"29a8b81811d948dab9db8dbacc0a2d34":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"53dde5d8c4934b81b254ac78a09b5dba":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"157c384de31845f1a345a9b44d3ebee6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0}
\ No newline at end of file
diff --git a/HF DeepRL Course/Unit7 - Multi-Agents.ipynb b/HF DeepRL Course/Unit7 - Multi-Agents.ipynb
new file mode 100644
index 0000000..c645c14
--- /dev/null
+++ b/HF DeepRL Course/Unit7 - Multi-Agents.ipynb	
@@ -0,0 +1,1355 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "128c7ef7-74f6-49b0-b29b-1b398d559ceb",
+   "metadata": {},
+   "source": [
+    "# AI vs AI Soccer\n",
+    "\n",
+    "Instructions: https://huggingface.co/learn/deep-rl-course/unit7/hands-on"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "305935aa-e84d-4ddf-9067-47445db0fdaa",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Step 0: Install MLAgents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "92f35f0d-73e3-4556-9f51-076d6391438e",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cloning into 'ml-agents'...\n",
+      "remote: Enumerating objects: 91591, done.\u001b[K\n",
+      "remote: Counting objects: 100% (2481/2481), done.\u001b[K\n",
+      "remote: Compressing objects: 100% (999/999), done.\u001b[K\n",
+      "remote: Total 91591 (delta 1404), reused 2060 (delta 1157), pack-reused 89110\u001b[K\n",
+      "Receiving objects: 100% (91591/91591), 2.87 GiB | 1.92 MiB/s, done.\n",
+      "Resolving deltas: 100% (66541/66541), done.\n",
+      "Obtaining file:///home/jakewalker/Projects/deeprl-course/ml-agents/ml-agents-envs\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hCollecting cloudpickle (from mlagents-envs==1.1.0.dev0)\n",
+      "  Obtaining dependency information for cloudpickle from https://files.pythonhosted.org/packages/96/43/dae06432d0c4b1dc9e9149ad37b4ca8384cf6eb7700cd9215b177b914f0a/cloudpickle-3.0.0-py3-none-any.whl.metadata\n",
+      "  Downloading cloudpickle-3.0.0-py3-none-any.whl.metadata (7.0 kB)\n",
+      "Collecting grpcio<=1.48.2,>=1.11.0 (from mlagents-envs==1.1.0.dev0)\n",
+      "  Downloading grpcio-1.48.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hCollecting Pillow>=4.2.1 (from mlagents-envs==1.1.0.dev0)\n",
+      "  Obtaining dependency information for Pillow>=4.2.1 from https://files.pythonhosted.org/packages/e5/b9/5c6ad3241f1ccca4b781dfeddbab2dac4480f95aedc351a0e60c9f4c8aa9/Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata\n",
+      "  Downloading Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.5 kB)\n",
+      "Collecting protobuf<3.20,>=3.6 (from mlagents-envs==1.1.0.dev0)\n",
+      "  Downloading protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m:01\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: pyyaml>=3.1.0 in ./.venv/lib/python3.10/site-packages (from mlagents-envs==1.1.0.dev0) (6.0.1)\n",
+      "Collecting gym>=0.21.0 (from mlagents-envs==1.1.0.dev0)\n",
+      "  Using cached gym-0.26.2-py3-none-any.whl\n",
+      "Collecting pettingzoo==1.15.0 (from mlagents-envs==1.1.0.dev0)\n",
+      "  Downloading PettingZoo-1.15.0.tar.gz (756 kB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m756.7/756.7 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hCollecting numpy<1.24.0,>=1.21.2 (from mlagents-envs==1.1.0.dev0)\n",
+      "  Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hCollecting filelock>=3.4.0 (from mlagents-envs==1.1.0.dev0)\n",
+      "  Obtaining dependency information for filelock>=3.4.0 from https://files.pythonhosted.org/packages/5e/5d/97afbafd9d584ff1b45fcb354a479a3609bd97f912f8f1f6c563cb1fae21/filelock-3.12.4-py3-none-any.whl.metadata\n",
+      "  Downloading filelock-3.12.4-py3-none-any.whl.metadata (2.8 kB)\n",
+      "Requirement already satisfied: six>=1.5.2 in ./.venv/lib/python3.10/site-packages (from grpcio<=1.48.2,>=1.11.0->mlagents-envs==1.1.0.dev0) (1.16.0)\n",
+      "Collecting gym-notices>=0.0.4 (from gym>=0.21.0->mlagents-envs==1.1.0.dev0)\n",
+      "  Using cached gym_notices-0.0.8-py3-none-any.whl (3.0 kB)\n",
+      "Downloading filelock-3.12.4-py3-none-any.whl (11 kB)\n",
+      "Downloading cloudpickle-3.0.0-py3-none-any.whl (20 kB)\n",
+      "Downloading Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (3.6 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m:01\u001b[0m\n",
+      "\u001b[?25hBuilding wheels for collected packages: pettingzoo\n",
+      "  Building wheel for pettingzoo (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for pettingzoo: filename=PettingZoo-1.15.0-py3-none-any.whl size=875632 sha256=0224ab58fef6069996c1020bb8ae12ec22385c575ed351da5f7d5843fd96ade7\n",
+      "  Stored in directory: /home/jakewalker/.cache/pip/wheels/e3/35/ac/76984cb1c12902d190c818d57c43d25c3f9281591a640ccd13\n",
+      "Successfully built pettingzoo\n",
+      "Installing collected packages: gym-notices, protobuf, Pillow, numpy, grpcio, filelock, cloudpickle, gym, pettingzoo, mlagents-envs\n",
+      "  Running setup.py develop for mlagents-envs\n",
+      "Successfully installed Pillow-10.1.0 cloudpickle-3.0.0 filelock-3.12.4 grpcio-1.48.2 gym-0.26.2 gym-notices-0.0.8 mlagents-envs-1.1.0.dev0 numpy-1.23.5 pettingzoo-1.15.0 protobuf-3.19.6\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Obtaining file:///home/jakewalker/Projects/deeprl-course/ml-agents/ml-agents\n",
+      "  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
+      "\u001b[?25hRequirement already satisfied: grpcio<=1.48.2,>=1.11.0 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (1.48.2)\n",
+      "Collecting h5py>=2.9.0 (from mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for h5py>=2.9.0 from https://files.pythonhosted.org/packages/3b/d3/ecb4b3d2ec2c84132987e5f12ab1408f455bec1d90cd5bc408ebf37800f5/h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n",
+      "  Downloading h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)\n",
+      "Requirement already satisfied: mlagents_envs==1.1.0.dev0 in ./ml-agents/ml-agents-envs (from mlagents==1.1.0.dev0) (1.1.0.dev0)\n",
+      "Requirement already satisfied: numpy<1.24.0,>=1.21.2 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (1.23.5)\n",
+      "Requirement already satisfied: Pillow>=4.2.1 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (10.1.0)\n",
+      "Requirement already satisfied: protobuf<3.20,>=3.6 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (3.19.6)\n",
+      "Requirement already satisfied: pyyaml>=3.1.0 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (6.0.1)\n",
+      "Collecting torch>=1.13.1 (from mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for torch>=1.13.1 from https://files.pythonhosted.org/packages/6d/13/b5e8bacd980b2195f8a1741ce11cbb9146568607795d5e4ff510dcff1064/torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl.metadata\n",
+      "  Downloading torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl.metadata (25 kB)\n",
+      "Collecting tensorboard>=2.14 (from mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for tensorboard>=2.14 from https://files.pythonhosted.org/packages/69/38/fb2ac9c4c8efbe020ae88f6772be87d51ef18526ac541fc3393786b7c45a/tensorboard-2.15.0-py3-none-any.whl.metadata\n",
+      "  Downloading tensorboard-2.15.0-py3-none-any.whl.metadata (1.7 kB)\n",
+      "Requirement already satisfied: six>=1.16 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (1.16.0)\n",
+      "Requirement already satisfied: attrs>=19.3.0 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (23.1.0)\n",
+      "Collecting huggingface_hub>=0.14 (from mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for huggingface_hub>=0.14 from https://files.pythonhosted.org/packages/ef/b5/b6107bd65fa4c96fdf00e4733e2fe5729bb9e5e09997f63074bb43d3ab28/huggingface_hub-0.18.0-py3-none-any.whl.metadata\n",
+      "  Downloading huggingface_hub-0.18.0-py3-none-any.whl.metadata (13 kB)\n",
+      "Collecting onnx==1.12.0 (from mlagents==1.1.0.dev0)\n",
+      "  Downloading onnx-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hCollecting cattrs<1.7,>=1.1.0 (from mlagents==1.1.0.dev0)\n",
+      "  Downloading cattrs-1.5.0-py3-none-any.whl (19 kB)\n",
+      "Requirement already satisfied: cloudpickle in ./.venv/lib/python3.10/site-packages (from mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (3.0.0)\n",
+      "Requirement already satisfied: gym>=0.21.0 in ./.venv/lib/python3.10/site-packages (from mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (0.26.2)\n",
+      "Requirement already satisfied: pettingzoo==1.15.0 in ./.venv/lib/python3.10/site-packages (from mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (1.15.0)\n",
+      "Requirement already satisfied: filelock>=3.4.0 in ./.venv/lib/python3.10/site-packages (from mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (3.12.4)\n",
+      "Requirement already satisfied: typing-extensions>=3.6.2.1 in ./.venv/lib/python3.10/site-packages (from onnx==1.12.0->mlagents==1.1.0.dev0) (4.8.0)\n",
+      "Collecting fsspec>=2023.5.0 (from huggingface_hub>=0.14->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for fsspec>=2023.5.0 from https://files.pythonhosted.org/packages/e8/f6/3eccfb530aac90ad1301c582da228e4763f19e719ac8200752a4841b0b2d/fsspec-2023.10.0-py3-none-any.whl.metadata\n",
+      "  Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)\n",
+      "Requirement already satisfied: requests in ./.venv/lib/python3.10/site-packages (from huggingface_hub>=0.14->mlagents==1.1.0.dev0) (2.31.0)\n",
+      "Collecting tqdm>=4.42.1 (from huggingface_hub>=0.14->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for tqdm>=4.42.1 from https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl.metadata\n",
+      "  Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: packaging>=20.9 in ./.venv/lib/python3.10/site-packages (from huggingface_hub>=0.14->mlagents==1.1.0.dev0) (23.2)\n",
+      "Collecting absl-py>=0.4 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for absl-py>=0.4 from https://files.pythonhosted.org/packages/01/e4/dc0a1dcc4e74e08d7abedab278c795eef54a224363bb18f5692f416d834f/absl_py-2.0.0-py3-none-any.whl.metadata\n",
+      "  Downloading absl_py-2.0.0-py3-none-any.whl.metadata (2.3 kB)\n",
+      "Collecting google-auth<3,>=1.6.3 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for google-auth<3,>=1.6.3 from https://files.pythonhosted.org/packages/39/7c/2e4fa55a99f83ef9ef229ac5d59c44ceb90e2d0145711590c0fa39669f32/google_auth-2.23.3-py2.py3-none-any.whl.metadata\n",
+      "  Downloading google_auth-2.23.3-py2.py3-none-any.whl.metadata (4.2 kB)\n",
+      "Collecting google-auth-oauthlib<2,>=0.5 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for google-auth-oauthlib<2,>=0.5 from https://files.pythonhosted.org/packages/ce/33/a907b4b67245647746dde8d61e1643ef5d210c88e090d491efd89eff9f95/google_auth_oauthlib-1.1.0-py2.py3-none-any.whl.metadata\n",
+      "  Downloading google_auth_oauthlib-1.1.0-py2.py3-none-any.whl.metadata (2.7 kB)\n",
+      "Collecting markdown>=2.6.8 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for markdown>=2.6.8 from https://files.pythonhosted.org/packages/bb/c1/50caaec6cadc1c6adc8fe351e03bd646d6e4dd17f55fca0f4c8d7ea8d3e9/Markdown-3.5-py3-none-any.whl.metadata\n",
+      "  Downloading Markdown-3.5-py3-none-any.whl.metadata (7.1 kB)\n",
+      "Requirement already satisfied: setuptools>=41.0.0 in ./.venv/lib/python3.10/site-packages (from tensorboard>=2.14->mlagents==1.1.0.dev0) (68.2.0)\n",
+      "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for tensorboard-data-server<0.8.0,>=0.7.0 from https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata\n",
+      "  Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n",
+      "Collecting werkzeug>=1.0.1 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for werkzeug>=1.0.1 from https://files.pythonhosted.org/packages/c3/fc/254c3e9b5feb89ff5b9076a23218dafbc99c96ac5941e900b71206e6313b/werkzeug-3.0.1-py3-none-any.whl.metadata\n",
+      "  Downloading werkzeug-3.0.1-py3-none-any.whl.metadata (4.1 kB)\n",
+      "Collecting sympy (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Using cached sympy-1.12-py3-none-any.whl (5.7 MB)\n",
+      "Collecting networkx (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for networkx from https://files.pythonhosted.org/packages/f6/eb/5585c96636bbb2755865c31d83a19dd220ef88e716df4659dacb86e009cc/networkx-3.2-py3-none-any.whl.metadata\n",
+      "  Downloading networkx-3.2-py3-none-any.whl.metadata (5.2 kB)\n",
+      "Requirement already satisfied: jinja2 in ./.venv/lib/python3.10/site-packages (from torch>=1.13.1->mlagents==1.1.0.dev0) (3.1.2)\n",
+      "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m:01\u001b[0m\n",
+      "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for nvidia-cudnn-cu12==8.9.2.26 from https://files.pythonhosted.org/packages/ff/74/a2e2be7fb83aaedec84f391f082cf765dfb635e7caa9b49065f73e4835d8/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata\n",
+      "  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n",
+      "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:03\u001b[0m\n",
+      "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:02\u001b[0m\n",
+      "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:03\u001b[0m\n",
+      "\u001b[?25hCollecting nvidia-nccl-cu12==2.18.1 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl (209.8 MB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.8/209.8 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:02\u001b[0m\n",
+      "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
+      "\u001b[2K     \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hCollecting triton==2.1.0 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for triton==2.1.0 from https://files.pythonhosted.org/packages/4d/22/91a8af421c8a8902dde76e6ef3db01b258af16c53d81e8c0d0dc13900a9e/triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata\n",
+      "  Downloading triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)\n",
+      "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for nvidia-nvjitlink-cu12 from https://files.pythonhosted.org/packages/45/de/885b6d3e1fa07bf19124076b348d3cf30f68051f813cba99e103f53d2f75/nvidia_nvjitlink_cu12-12.3.52-py3-none-manylinux1_x86_64.whl.metadata\n",
+      "  Downloading nvidia_nvjitlink_cu12-12.3.52-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n",
+      "Collecting cachetools<6.0,>=2.0.0 (from google-auth<3,>=1.6.3->tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Obtaining dependency information for cachetools<6.0,>=2.0.0 from https://files.pythonhosted.org/packages/a2/91/2d843adb9fbd911e0da45fbf6f18ca89d07a087c3daa23e955584f90ebf4/cachetools-5.3.2-py3-none-any.whl.metadata\n",
+      "  Downloading cachetools-5.3.2-py3-none-any.whl.metadata (5.2 kB)\n",
+      "Collecting pyasn1-modules>=0.2.1 (from google-auth<3,>=1.6.3->tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Using cached pyasn1_modules-0.3.0-py2.py3-none-any.whl (181 kB)\n",
+      "Collecting rsa<5,>=3.1.4 (from google-auth<3,>=1.6.3->tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Using cached rsa-4.9-py3-none-any.whl (34 kB)\n",
+      "Collecting requests-oauthlib>=0.7.0 (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Using cached requests_oauthlib-1.3.1-py2.py3-none-any.whl (23 kB)\n",
+      "Requirement already satisfied: gym-notices>=0.0.4 in ./.venv/lib/python3.10/site-packages (from gym>=0.21.0->mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (0.0.8)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in ./.venv/lib/python3.10/site-packages (from requests->huggingface_hub>=0.14->mlagents==1.1.0.dev0) (3.3.1)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in ./.venv/lib/python3.10/site-packages (from requests->huggingface_hub>=0.14->mlagents==1.1.0.dev0) (3.4)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.10/site-packages (from requests->huggingface_hub>=0.14->mlagents==1.1.0.dev0) (2.0.7)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in ./.venv/lib/python3.10/site-packages (from requests->huggingface_hub>=0.14->mlagents==1.1.0.dev0) (2023.7.22)\n",
+      "Requirement already satisfied: MarkupSafe>=2.1.1 in ./.venv/lib/python3.10/site-packages (from werkzeug>=1.0.1->tensorboard>=2.14->mlagents==1.1.0.dev0) (2.1.3)\n",
+      "Collecting mpmath>=0.19 (from sympy->torch>=1.13.1->mlagents==1.1.0.dev0)\n",
+      "  Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
+      "Collecting pyasn1<0.6.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Using cached pyasn1-0.5.0-py2.py3-none-any.whl (83 kB)\n",
+      "Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.14->mlagents==1.1.0.dev0)\n",
+      "  Using cached oauthlib-3.2.2-py3-none-any.whl (151 kB)\n",
+      "Downloading h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.8/4.8 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m:01\u001b[0m\n",
+      "\u001b[?25hDownloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading tensorboard-2.15.0-py3-none-any.whl (5.6 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl (670.2 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m844.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m eta \u001b[36m0:00:01\u001b[0m[36m0:00:07\u001b[0m\n",
+      "\u001b[?25hDownloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:06\u001b[0m\n",
+      "\u001b[?25hDownloading triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89.2 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.2/89.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading absl_py-2.0.0-py3-none-any.whl (130 kB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.4/166.4 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading google_auth-2.23.3-py2.py3-none-any.whl (182 kB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m182.3/182.3 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading google_auth_oauthlib-1.1.0-py2.py3-none-any.whl (19 kB)\n",
+      "Downloading Markdown-3.5-py3-none-any.whl (101 kB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.7/101.7 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hUsing cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n",
+      "Downloading werkzeug-3.0.1-py3-none-any.whl (226 kB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading networkx-3.2-py3-none-any.whl (1.6 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m:01\u001b[0m\n",
+      "\u001b[?25hDownloading cachetools-5.3.2-py3-none-any.whl (9.3 kB)\n",
+      "Downloading nvidia_nvjitlink_cu12-12.3.52-py3-none-manylinux1_x86_64.whl (20.5 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.5/20.5 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hInstalling collected packages: mpmath, werkzeug, triton, tqdm, tensorboard-data-server, sympy, pyasn1, onnx, oauthlib, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, networkx, markdown, h5py, fsspec, cattrs, cachetools, absl-py, rsa, requests-oauthlib, pyasn1-modules, nvidia-cusparse-cu12, nvidia-cudnn-cu12, huggingface_hub, nvidia-cusolver-cu12, google-auth, torch, google-auth-oauthlib, tensorboard, mlagents\n",
+      "  Running setup.py develop for mlagents\n",
+      "Successfully installed absl-py-2.0.0 cachetools-5.3.2 cattrs-1.5.0 fsspec-2023.10.0 google-auth-2.23.3 google-auth-oauthlib-1.1.0 h5py-3.10.0 huggingface_hub-0.18.0 markdown-3.5 mlagents-1.1.0.dev0 mpmath-1.3.0 networkx-3.2 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.18.1 nvidia-nvjitlink-cu12-12.3.52 nvidia-nvtx-cu12-12.1.105 oauthlib-3.2.2 onnx-1.12.0 pyasn1-0.5.0 pyasn1-modules-0.3.0 requests-oauthlib-1.3.1 rsa-4.9 sympy-1.12 tensorboard-2.15.0 tensorboard-data-server-0.7.2 torch-2.1.0 tqdm-4.66.1 triton-2.1.0 werkzeug-3.0.1\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Requirement already satisfied: torch in ./.venv/lib/python3.10/site-packages (2.1.0)\n",
+      "Requirement already satisfied: onnx==1.12.0 in ./.venv/lib/python3.10/site-packages (1.12.0)\n",
+      "Requirement already satisfied: numpy>=1.16.6 in ./.venv/lib/python3.10/site-packages (from onnx==1.12.0) (1.23.5)\n",
+      "Requirement already satisfied: protobuf<=3.20.1,>=3.12.2 in ./.venv/lib/python3.10/site-packages (from onnx==1.12.0) (3.19.6)\n",
+      "Requirement already satisfied: typing-extensions>=3.6.2.1 in ./.venv/lib/python3.10/site-packages (from onnx==1.12.0) (4.8.0)\n",
+      "Requirement already satisfied: filelock in ./.venv/lib/python3.10/site-packages (from torch) (3.12.4)\n",
+      "Requirement already satisfied: sympy in ./.venv/lib/python3.10/site-packages (from torch) (1.12)\n",
+      "Requirement already satisfied: networkx in ./.venv/lib/python3.10/site-packages (from torch) (3.2)\n",
+      "Requirement already satisfied: jinja2 in ./.venv/lib/python3.10/site-packages (from torch) (3.1.2)\n",
+      "Requirement already satisfied: fsspec in ./.venv/lib/python3.10/site-packages (from torch) (2023.10.0)\n",
+      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.105)\n",
+      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.105)\n",
+      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.105)\n",
+      "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in ./.venv/lib/python3.10/site-packages (from torch) (8.9.2.26)\n",
+      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.3.1)\n",
+      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in ./.venv/lib/python3.10/site-packages (from torch) (11.0.2.54)\n",
+      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in ./.venv/lib/python3.10/site-packages (from torch) (10.3.2.106)\n",
+      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in ./.venv/lib/python3.10/site-packages (from torch) (11.4.5.107)\n",
+      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.0.106)\n",
+      "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in ./.venv/lib/python3.10/site-packages (from torch) (2.18.1)\n",
+      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.105)\n",
+      "Requirement already satisfied: triton==2.1.0 in ./.venv/lib/python3.10/site-packages (from torch) (2.1.0)\n",
+      "Requirement already satisfied: nvidia-nvjitlink-cu12 in ./.venv/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.3.52)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in ./.venv/lib/python3.10/site-packages (from jinja2->torch) (2.1.3)\n",
+      "Requirement already satisfied: mpmath>=0.19 in ./.venv/lib/python3.10/site-packages (from sympy->torch) (1.3.0)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "!git clone https://github.com/Unity-Technologies/ml-agents\n",
+    "!pip install -e ./ml-agents/ml-agents-envs\n",
+    "!pip install -e ./ml-agents/ml-agents\n",
+    "!pip install torch onnx==1.12.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "6a6054c6-3d59-4ffb-ade9-83740e7c18b5",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--2023-10-25 10:44:43--  https://docs.google.com/uc?export=download&confirm=&id=1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL\n",
+      "Resolving docs.google.com (docs.google.com)... 142.250.200.46, 2a00:1450:4009:823::200e\n",
+      "Connecting to docs.google.com (docs.google.com)|142.250.200.46|:443... connected.\n",
+      "HTTP request sent, awaiting response... 303 See Other\n",
+      "Location: https://doc-04-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9p0egu57r3s8rkm6p23iualfupmjc5ra/1698227025000/09764732090272539193/*/1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL?e=download&uuid=273505f9-1676-4126-8bda-65e629c6560e [following]\n",
+      "Warning: wildcards not supported in HTTP.\n",
+      "--2023-10-25 10:44:43--  https://doc-04-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9p0egu57r3s8rkm6p23iualfupmjc5ra/1698227025000/09764732090272539193/*/1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL?e=download&uuid=273505f9-1676-4126-8bda-65e629c6560e\n",
+      "Resolving doc-04-8c-docs.googleusercontent.com (doc-04-8c-docs.googleusercontent.com)... 142.250.200.1, 2a00:1450:4009:822::2001\n",
+      "Connecting to doc-04-8c-docs.googleusercontent.com (doc-04-8c-docs.googleusercontent.com)|142.250.200.1|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 36963480 (35M) [application/x-zip-compressed]\n",
+      "Saving to: ‘./SoccerTwos.zip’\n",
+      "\n",
+      "./SoccerTwos.zip    100%[===================>]  35.25M  13.6MB/s    in 2.6s    \n",
+      "\n",
+      "2023-10-25 10:44:46 (13.6 MB/s) - ‘./SoccerTwos.zip’ saved [36963480/36963480]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "!wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=FILEID' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL\" -O ./SoccerTwos.zip && rm -rf /tmp/cookies.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "11d60d74-33fb-4144-8353-600151ceccc4",
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Archive:  ./SoccerTwos.zip\n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/app.info  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/boot.config  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/globalgamemanagers  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/globalgamemanagers.assets  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/level0  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/level0.resS  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Assembly-CSharp.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Google.Protobuf.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Grpc.Core.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Mono.Security.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/mscorlib.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/netstandard.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Newtonsoft.Json.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.ComponentModel.Composition.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Configuration.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Core.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Data.DataSetExtensions.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Data.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Drawing.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.EnterpriseServices.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Interactive.Async.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.IO.Abstractions.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.IO.Abstractions.TestingHelpers.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.IO.Compression.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.IO.Compression.FileSystem.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Net.Http.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Numerics.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Runtime.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Runtime.Serialization.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Security.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.ServiceModel.Internals.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Transactions.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Xml.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Xml.Linq.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Barracuda.BurstBLAS.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Barracuda.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Barracuda.ONNX.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Cecil.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Cecil.Mdb.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Cecil.Pdb.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Cecil.Rocks.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Unsafe.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.InputSystem.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Mathematics.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.ML-Agents.CommunicatorObjects.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.ML-Agents.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.ML-Agents.Extensions.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.ML-Agents.Extensions.Input.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.TextMeshPro.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AccessibilityModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AIModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AndroidJNIModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AnimationModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AssetBundleModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AudioModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ClothModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ClusterInputModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ClusterRendererModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.CoreModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.CrashReportingModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.DirectorModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.DSPGraphModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.GameCenterModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.GIModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.GridModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.HotReloadModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ImageConversionModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.IMGUIModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.InputLegacyModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.InputModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.JSONSerializeModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.LocalizationModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ParticleSystemModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.PerformanceReportingModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.Physics2DModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.PhysicsModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ProfilerModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.RuntimeInitializeOnLoadManagerInitializerModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ScreenCaptureModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SharedInternalsModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SpriteMaskModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SpriteShapeModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.StreamingModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SubstanceModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SubsystemsModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TerrainModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TerrainPhysicsModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TextCoreFontEngineModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TextCoreTextEngineModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TextRenderingModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TilemapModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TLSModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UI.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UIElementsModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UIElementsNativeModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UIModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UmbraModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UNETModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityAnalyticsCommonModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityAnalyticsModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityConnectModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityCurlModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityTestProtocolModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestAssetBundleModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestAudioModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestTextureModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestWWWModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VehiclesModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VFXModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VideoModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VirtualTexturingModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VRModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.WindModule.dll  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.XRModule.dll  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/\n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/config  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/\n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/\n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/Browsers/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/Browsers/Compat.browser  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/DefaultWsdlHelpGenerator.aspx  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/machine.config  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/settings.map  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/web.config  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/\n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/Browsers/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/Browsers/Compat.browser  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/DefaultWsdlHelpGenerator.aspx  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/machine.config  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/settings.map  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/web.config  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/\n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/Browsers/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/Browsers/Compat.browser  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/DefaultWsdlHelpGenerator.aspx  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/machine.config  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/settings.map  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/web.config  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/browscap.ini  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/config  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/mconfig/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/mconfig/config.xml  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/x86_64/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/x86_64/libmono-native.so  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/x86_64/libmonobdwgc-2.0.so  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/x86_64/libMonoPosixHelper.so  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Plugins/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Plugins/libgrpc_csharp_ext.x64.so  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Plugins/lib_burst_generated.so  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Resources/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Resources/unity default resources  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Resources/UnityPlayer.png  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Resources/unity_builtin_extra  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/resources.assets  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/resources.assets.resS  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/RuntimeInitializeOnLoads.json  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/ScriptingAssemblies.json  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/sharedassets0.assets  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/sharedassets0.assets.resS  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos.x86_64  \n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/UnityPlayer.so  \n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_BurstDebugInformation_DoNotShip/\n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_BurstDebugInformation_DoNotShip/Data/\n",
+      "   creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_BurstDebugInformation_DoNotShip/Data/Plugins/\n",
+      "  inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_BurstDebugInformation_DoNotShip/Data/Plugins/lib_burst_generated.txt  \n"
+     ]
+    }
+   ],
+   "source": [
+    "!mkdir -p ./ml-agents/training-envs-executables/SoccerTwos\n",
+    "!unzip ./SoccerTwos.zip -d ./ml-agents/training-envs-executables/SoccerTwos"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aa1b69a5-c232-411e-a395-65f7ee31e1ce",
+   "metadata": {},
+   "source": [
+    "## Step 1: Understand the environment\n",
+    "\n",
+    "- **Reward Function:** $1 - \\text{accumulated time penalty}$. When ball enters opponent's goal accumulated time penalty is incremented by $1 \\div \\text{max step}$ every fixed update and is reset to $0$ at the beginning of the episode. $-1$ when ball enters the team's goal.\n",
+    "- **Observation Space:** composed of vectors of size 336:\n",
+    "    - 11 ray-casts forward distributed over 120 degrees (264 state dimensions)\n",
+    "    - 3 ray-casts backward distributed over 90 degrees (72 state dimensions)\n",
+    "    - Both of these ray-casts can detect 6 objects:\n",
+    "        - Ball\n",
+    "        - Blue Goal\n",
+    "        - Purple Goal\n",
+    "        - Wall\n",
+    "        - Blue Agent\n",
+    "        - Purple Agent\n",
+    "- **Action Space:**\n",
+    "    - Forward motion: Up/Down\n",
+    "    - Sideways motion: Left/Right\n",
+    "    - Rotation: Left/Right"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4bc3cfd3-c34d-45f7-932a-5f3bb65d070e",
+   "metadata": {},
+   "source": [
+    "## Step 2: Understand MA-POCA\n",
+    "\n",
+    "[See here](https://huggingface.co/learn/deep-rl-course/unit7/hands-on#step-2-understand-ma-poca)\n",
+    "\n",
+    "![](https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit10/mapoca.png)\n",
+    "\n",
+    "Using a MC-POCA trainer (or 'coach') (called poca) which helps train cooperative behaviour."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "af92b8ec-2fac-4118-8fc5-ef1e8b68bf6b",
+   "metadata": {},
+   "source": [
+    "## Step 3: Define the config file\n",
+    "\n",
+    "**`./ml-agents/config/poca/SoccerTwos.yaml`:**\n",
+    "\n",
+    "```yaml\n",
+    "behaviors:\n",
+    "  SoccerTwos:\n",
+    "    trainer_type: poca\n",
+    "    hyperparameters:\n",
+    "      batch_size: 2048\n",
+    "      buffer_size: 20480\n",
+    "      learning_rate: 0.0003\n",
+    "      beta: 0.005\n",
+    "      epsilon: 0.2\n",
+    "      lambd: 0.95\n",
+    "      num_epoch: 3\n",
+    "      learning_rate_schedule: constant\n",
+    "    network_settings:\n",
+    "      normalize: false\n",
+    "      hidden_units: 512\n",
+    "      num_layers: 2\n",
+    "      vis_encode_type: simple\n",
+    "    reward_signals:\n",
+    "      extrinsic:\n",
+    "        gamma: 0.99\n",
+    "        strength: 1.0\n",
+    "    keep_checkpoints: 5\n",
+    "    max_steps: 5000000\n",
+    "    time_horizon: 1000\n",
+    "    summary_freq: 10000\n",
+    "    self_play:\n",
+    "      save_steps: 50000\n",
+    "      team_change: 200000\n",
+    "      swap_steps: 2000\n",
+    "      window: 10\n",
+    "      play_against_latest_model_ratio: 0.5\n",
+    "      initial_elo: 1200.0\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6e7ff86a-1f86-47a2-941e-3e80e21c9e57",
+   "metadata": {},
+   "source": [
+    "## Step 4: Start the training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "795032b7-978e-4822-b680-a42fc8bc216f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/jakewalker/Projects/deeprl-course/.venv/lib/python3.10/site-packages/torch/__init__.py:614: UserWarning: torch.set_default_tensor_type() is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and torch.set_default_device() as alternatives. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:451.)\n",
+      "  _C._set_default_tensor_type(t)\n",
+      "\n",
+      "            ┐  ╖\n",
+      "        ╓╖╬│╡  ││╬╖╖\n",
+      "    ╓╖╬│││││┘  ╬│││││╬╖\n",
+      " ╖╬│││││╬╜        ╙╬│││││╖╖                               ╗╗╗\n",
+      " ╬╬╬╬╖││╦╖        ╖╬││╗╣╣╣╬      ╟╣╣╬    ╟╣╣╣             ╜╜╜  ╟╣╣\n",
+      " ╬╬╬╬╬╬╬╬╖│╬╖╖╓╬╪│╓╣╣╣╣╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╒╣╣╖╗╣╣╣╗   ╣╣╣ ╣╣╣╣╣╣ ╟╣╣╖   ╣╣╣\n",
+      " ╬╬╬╬┐  ╙╬╬╬╬│╓╣╣╣╝╜  ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╣╙ ╙╣╣╣  ╣╣╣ ╙╟╣╣╜╙  ╫╣╣  ╟╣╣\n",
+      " ╬╬╬╬┐     ╙╬╬╣╣      ╫╣╣╣╬      ╟╣╣╬    ╟╣╣╣ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣     ╣╣╣┌╣╣╜\n",
+      " ╬╬╬╜       ╬╬╣╣      ╙╝╣╣╬      ╙╣╣╣╗╖╓╗╣╣╣╜ ╟╣╣╬   ╣╣╣  ╣╣╣  ╟╣╣╦╓    ╣╣╣╣╣\n",
+      " ╙   ╓╦╖    ╬╬╣╣   ╓╗╗╖            ╙╝╣╣╣╣╝╜   ╘╝╝╜   ╝╝╝  ╝╝╝   ╙╣╣╣    ╟╣╣╣\n",
+      "   ╩╬╬╬╬╬╬╦╦╬╬╣╣╗╣╣╣╣╣╣╣╝                                             ╫╣╣╣╣\n",
+      "      ╙╬╬╬╬╬╬╬╣╣╣╣╣╣╝╜\n",
+      "          ╙╬╬╬╣╣╣╜\n",
+      "             ╙\n",
+      "        \n",
+      " Version information:\n",
+      "  ml-agents: 1.1.0.dev0,\n",
+      "  ml-agents-envs: 1.1.0.dev0,\n",
+      "  Communicator API: 1.5.0,\n",
+      "  PyTorch: 2.1.0+cu121\n",
+      "[INFO] Connected to Unity environment with package version 2.3.0-exp.3 and communication version 1.5.0\n",
+      "[INFO] Connected new brain: SoccerTwos?team=1\n",
+      "[INFO] Connected new brain: SoccerTwos?team=0\n",
+      "[WARNING] Deleting TensorBoard data events.out.tfevents.1698236060.adminuser-System-Product-Name.10733.0 that was left over from a previous run.\n",
+      "[INFO] Hyperparameters for behavior name SoccerTwos: \n",
+      "\ttrainer_type:\tpoca\n",
+      "\thyperparameters:\t\n",
+      "\t  batch_size:\t2048\n",
+      "\t  buffer_size:\t204800\n",
+      "\t  learning_rate:\t0.0003\n",
+      "\t  beta:\t0.005\n",
+      "\t  epsilon:\t0.2\n",
+      "\t  lambd:\t0.95\n",
+      "\t  num_epoch:\t3\n",
+      "\t  learning_rate_schedule:\tconstant\n",
+      "\t  beta_schedule:\tconstant\n",
+      "\t  epsilon_schedule:\tconstant\n",
+      "\tcheckpoint_interval:\t500000\n",
+      "\tnetwork_settings:\t\n",
+      "\t  normalize:\tFalse\n",
+      "\t  hidden_units:\t512\n",
+      "\t  num_layers:\t3\n",
+      "\t  vis_encode_type:\tsimple\n",
+      "\t  memory:\tNone\n",
+      "\t  goal_conditioning_type:\thyper\n",
+      "\t  deterministic:\tFalse\n",
+      "\treward_signals:\t\n",
+      "\t  extrinsic:\t\n",
+      "\t    gamma:\t0.99\n",
+      "\t    strength:\t1.0\n",
+      "\t    network_settings:\t\n",
+      "\t      normalize:\tFalse\n",
+      "\t      hidden_units:\t128\n",
+      "\t      num_layers:\t2\n",
+      "\t      vis_encode_type:\tsimple\n",
+      "\t      memory:\tNone\n",
+      "\t      goal_conditioning_type:\thyper\n",
+      "\t      deterministic:\tFalse\n",
+      "\tinit_path:\tNone\n",
+      "\tkeep_checkpoints:\t5\n",
+      "\teven_checkpoints:\tFalse\n",
+      "\tmax_steps:\t5000000\n",
+      "\ttime_horizon:\t1000\n",
+      "\tsummary_freq:\t10000\n",
+      "\tthreaded:\tFalse\n",
+      "\tself_play:\t\n",
+      "\t  save_steps:\t50000\n",
+      "\t  team_change:\t200000\n",
+      "\t  swap_steps:\t2000\n",
+      "\t  window:\t10\n",
+      "\t  play_against_latest_model_ratio:\t0.5\n",
+      "\t  initial_elo:\t1200.0\n",
+      "\tbehavioral_cloning:\tNone\n",
+      "/home/jakewalker/Projects/deeprl-course/ml-agents/ml-agents/mlagents/trainers/torch_entities/utils.py:289: UserWarning: The use of `x.T` on tensors of dimension other than 2 to reverse their shape is deprecated and it will throw an error in a future release. Consider `x.mT` to transpose batches of matrices or `x.permute(*torch.arange(x.ndim - 1, -1, -1))` to reverse the dimensions of a tensor. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3614.)\n",
+      "  torch.nn.functional.one_hot(_act.T, action_size[i]).float()\n",
+      "[INFO] SoccerTwos. Step: 10000. Time Elapsed: 20.758 s. Mean Reward: 0.000. Mean Group Reward: 0.026. Training. ELO: 1201.067.\n"
+     ]
+    }
+   ],
+   "source": [
+    "!chmod +x ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos.x86_64\n",
+    "!mlagents-learn ./ml-agents/config/poca/SoccerTwos.yaml --env=./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=\"SoccerTwos\" --no-graphics"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ee052048-4fe5-4c54-a01b-7c9e1c5baac4",
+   "metadata": {},
+   "source": [
+    "## Step 5. Push the agent to the Hugging Face Hub"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e037b3bf-3160-448e-a4ca-2a6d95c463ec",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[INFO] This function will create a model card and upload your SoccerTwos into HuggingFace Hub. This is a work in progress: If you encounter a bug, please send open an issue\n",
+      "[INFO] Pushing repo SoccerTwos to the Hugging Face Hub\n",
+      "SoccerTwos-3499096.onnx:   0%|                      | 0.00/1.77M [00:00<?, ?B/s]\n",
+      "SoccerTwos-3499096.pt:   0%|                        | 0.00/28.4M [00:00<?, ?B/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-3999170.onnx:   0%|                      | 0.00/1.77M [00:00<?, ?B/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:   0%|                        | 0.00/28.4M [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "Upload 13 LFS files:   0%|                               | 0/13 [00:00<?, ?it/s]\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3499096.onnx:   1%|             | 16.4k/1.77M [00:00<00:54, 32.4kB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.onnx:   1%|             | 16.4k/1.77M [00:00<00:55, 31.8kB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:   0%|               | 16.4k/28.4M [00:00<15:51, 29.9kB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:   0%|               | 16.4k/28.4M [00:00<15:23, 30.8kB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-3499096.onnx:  77%|█████████▉   | 1.36M/1.77M [00:00<00:00, 2.87MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.onnx:  22%|███▎           | 393k/1.77M [00:00<00:01, 843kB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:   1%|▏                | 246k/28.4M [00:00<00:59, 477kB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:   1%|▏                | 246k/28.4M [00:00<01:00, 466kB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-3999170.onnx:  19%|██▊            | 328k/1.77M [00:00<00:02, 567kB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.onnx:  35%|████▉         | 623k/1.77M [00:00<00:00, 1.15MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:   2%|▎                | 508k/28.4M [00:00<00:29, 934kB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:   2%|▎                | 508k/28.4M [00:00<00:32, 852kB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:   3%|▍               | 754k/28.4M [00:00<00:23, 1.16MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-3999170.onnx:  33%|█████          | 590k/1.77M [00:00<00:01, 814kB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.onnx:  60%|███████▊     | 1.06M/1.77M [00:00<00:00, 1.64MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.onnx: 100%|█████████████| 1.77M/1.77M [00:00<00:00, 1.78MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.onnx:  93%|████████████ | 1.64M/1.77M [00:01<00:00, 2.49MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:   4%|▋              | 1.28M/28.4M [00:01<00:13, 2.02MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:   4%|▋              | 1.28M/28.4M [00:01<00:13, 1.97MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-3999170.onnx:  48%|██████▋       | 852k/1.77M [00:01<00:00, 1.07MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:   7%|█              | 2.05M/28.4M [00:01<00:08, 3.27MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:   7%|█              | 2.11M/28.4M [00:01<00:07, 3.34MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-3999170.onnx:  69%|█████████    | 1.23M/1.77M [00:01<00:00, 1.45MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  11%|█▌             | 3.01M/28.4M [00:01<00:05, 4.68MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  11%|█▋             | 3.08M/28.4M [00:01<00:05, 4.62MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.onnx: 100%|█████████████| 1.77M/1.77M [00:01<00:00, 1.35MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.onnx:  84%|██████████▉  | 1.49M/1.77M [00:01<00:00, 1.64MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  13%|█▉             | 3.78M/28.4M [00:01<00:04, 5.32MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:   5%|▊              | 1.52M/28.4M [00:00<00:02, 11.4MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-3999170.onnx:  99%|████████████▉| 1.75M/1.77M [00:01<00:00, 1.73MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:   9%|█▍             | 2.67M/28.4M [00:00<00:02, 11.4MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  17%|██▌            | 4.88M/28.4M [00:01<00:04, 5.63MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  18%|██▊            | 5.26M/28.4M [00:01<00:04, 5.69MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos.onnx:   0%|                              | 0.00/1.77M [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:  13%|██             | 3.82M/28.4M [00:00<00:02, 9.81MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.onnx: 100%|██████████████| 1.77M/1.77M [00:01<00:00, 992kB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  17%|██▌            | 4.82M/28.4M [00:00<00:02, 9.72MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  23%|███▍           | 6.55M/28.4M [00:01<00:03, 6.23MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  25%|███▋           | 7.00M/28.4M [00:01<00:03, 6.16MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  20%|███            | 5.80M/28.4M [00:00<00:02, 8.50MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  25%|███▊           | 7.19M/28.4M [00:01<00:03, 5.82MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  27%|████           | 7.70M/28.4M [00:01<00:03, 6.21MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:  24%|███▌           | 6.68M/28.4M [00:00<00:02, 7.90MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  30%|████▍          | 8.40M/28.4M [00:02<00:03, 5.59MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:   0%|                        | 0.00/28.4M [00:00<?, ?B/s]\u001b[A\u001b[A\n",
+      "SoccerTwos.onnx: 100%|█████████████████████| 1.77M/1.77M [00:00<00:00, 3.23MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-3499096.pt:  32%|████▊          | 9.13M/28.4M [00:02<00:03, 5.59MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  29%|████▍          | 8.31M/28.4M [00:01<00:02, 6.93MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:   1%|▏               | 295k/28.4M [00:00<00:15, 1.78MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  34%|█████          | 9.70M/28.4M [00:02<00:03, 5.26MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  35%|█████▎         | 10.0M/28.4M [00:02<00:03, 5.75MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  33%|████▉          | 9.42M/28.4M [00:01<00:02, 7.37MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos.onnx:   0%|                              | 0.00/1.77M [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  36%|█████▍         | 10.3M/28.4M [00:02<00:03, 5.34MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  37%|█████▌         | 10.5M/28.4M [00:01<00:02, 7.21MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:   3%|▍               | 885k/28.4M [00:00<00:15, 1.78MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos.onnx:  51%|███████████▏          | 901k/1.77M [00:00<00:00, 5.22MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  39%|█████▊         | 11.0M/28.4M [00:02<00:03, 4.97MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  39%|█████▉         | 11.2M/28.4M [00:01<00:02, 6.78MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:   4%|▌              | 1.15M/28.4M [00:00<00:14, 1.85MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  42%|██████▎        | 11.9M/28.4M [00:01<00:02, 6.31MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  41%|██████▏        | 11.7M/28.4M [00:02<00:03, 4.48MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  43%|██████▍        | 12.2M/28.4M [00:02<00:03, 4.86MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:   5%|▋              | 1.39M/28.4M [00:00<00:15, 1.73MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  44%|██████▌        | 12.4M/28.4M [00:03<00:03, 4.71MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  45%|██████▊        | 12.9M/28.4M [00:01<00:02, 6.09MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:   6%|▊              | 1.65M/28.4M [00:00<00:14, 1.81MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  46%|██████▉        | 13.2M/28.4M [00:03<00:02, 5.26MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos.onnx: 100%|█████████████████████| 1.77M/1.77M [00:00<00:00, 2.46MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-3499096.pt:  49%|███████▎       | 13.8M/28.4M [00:03<00:02, 5.22MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  49%|███████▍       | 14.1M/28.4M [00:01<00:02, 5.84MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  51%|███████▌       | 14.4M/28.4M [00:03<00:02, 5.39MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  51%|███████▋       | 14.6M/28.4M [00:03<00:02, 5.91MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:   9%|█▎             | 2.42M/28.4M [00:01<00:12, 2.12MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  53%|███████▉       | 15.1M/28.4M [00:02<00:02, 5.94MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:   0%|                        | 0.00/28.4M [00:00<?, ?B/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  54%|████████▏      | 15.5M/28.4M [00:03<00:02, 6.00MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  10%|█▍             | 2.75M/28.4M [00:01<00:11, 2.25MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  55%|████████▎      | 15.7M/28.4M [00:03<00:02, 4.87MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:   4%|▌              | 1.15M/28.4M [00:00<00:03, 7.64MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  11%|█▌             | 3.00M/28.4M [00:01<00:11, 2.18MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:   7%|█              | 2.11M/28.4M [00:00<00:03, 7.15MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  11%|█▋             | 3.26M/28.4M [00:01<00:12, 2.06MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  12%|█▊             | 3.51M/28.4M [00:00<00:02, 9.62MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  57%|████████▌      | 16.1M/28.4M [00:03<00:03, 3.60MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  13%|█▉             | 3.65M/28.4M [00:01<00:10, 2.42MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  19%|██▉            | 5.49M/28.4M [00:00<00:01, 13.1MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  60%|█████████      | 17.1M/28.4M [00:03<00:02, 4.69MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  56%|████████▍      | 16.0M/28.4M [00:02<00:03, 3.31MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  14%|██             | 3.92M/28.4M [00:01<00:10, 2.44MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  25%|███▋           | 7.00M/28.4M [00:00<00:01, 11.7MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  59%|████████▉      | 16.9M/28.4M [00:04<00:03, 3.31MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  62%|█████████▎     | 17.8M/28.4M [00:04<00:02, 4.51MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  62%|█████████▏     | 17.5M/28.4M [00:02<00:02, 4.29MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  32%|████▊          | 9.06M/28.4M [00:00<00:01, 12.4MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  61%|█████████      | 17.3M/28.4M [00:04<00:03, 3.35MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  65%|█████████▋     | 18.4M/28.4M [00:04<00:02, 4.20MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  16%|██▎            | 4.49M/28.4M [00:02<00:11, 2.11MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  64%|█████████▌     | 18.1M/28.4M [00:03<00:02, 4.10MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  62%|█████████▎     | 17.7M/28.4M [00:04<00:03, 3.31MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  67%|█████████▉     | 18.9M/28.4M [00:04<00:02, 4.35MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  17%|██▍            | 4.73M/28.4M [00:02<00:12, 1.95MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  42%|██████▎        | 11.9M/28.4M [00:01<00:01, 11.2MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  65%|█████████▊     | 18.6M/28.4M [00:03<00:02, 3.90MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  68%|██████████▏    | 19.4M/28.4M [00:04<00:02, 3.49MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  18%|██▋            | 5.00M/28.4M [00:02<00:12, 1.82MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  65%|█████████▋     | 18.4M/28.4M [00:04<00:03, 2.84MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  67%|██████████     | 19.1M/28.4M [00:03<00:02, 3.50MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  66%|█████████▉     | 18.8M/28.4M [00:04<00:03, 2.82MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  19%|██▊            | 5.32M/28.4M [00:02<00:12, 1.86MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  53%|███████▉       | 15.0M/28.4M [00:01<00:01, 11.0MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:  69%|██████████▎    | 19.6M/28.4M [00:03<00:02, 3.48MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  67%|██████████     | 19.1M/28.4M [00:04<00:03, 2.51MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  20%|██▉            | 5.57M/28.4M [00:02<00:12, 1.78MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:  71%|██████████▌    | 20.1M/28.4M [00:03<00:02, 3.29MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  68%|██████████▏    | 19.3M/28.4M [00:05<00:03, 2.41MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  21%|███            | 5.90M/28.4M [00:02<00:12, 1.86MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:  73%|██████████▉    | 20.6M/28.4M [00:03<00:02, 3.41MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  57%|████████▌      | 16.2M/28.4M [00:01<00:01, 6.36MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  22%|███▎           | 6.16M/28.4M [00:03<00:11, 1.97MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  78%|███████████▋   | 22.1M/28.4M [00:05<00:01, 4.25MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  74%|███████████▏   | 21.2M/28.4M [00:04<00:02, 3.29MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  23%|███▍           | 6.41M/28.4M [00:03<00:11, 1.94MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  66%|█████████▊     | 18.7M/28.4M [00:01<00:01, 8.12MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  71%|██████████▋    | 20.2M/28.4M [00:05<00:03, 2.33MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  80%|████████████   | 22.8M/28.4M [00:05<00:01, 3.95MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  23%|███▍           | 6.62M/28.4M [00:03<00:10, 1.98MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  72%|██████████▊    | 20.6M/28.4M [00:05<00:03, 2.49MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  24%|███▌           | 6.83M/28.4M [00:03<00:11, 1.93MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  76%|███████████▍   | 21.7M/28.4M [00:04<00:02, 2.89MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  82%|████████████▎  | 23.4M/28.4M [00:05<00:01, 3.80MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  74%|███████████    | 20.9M/28.4M [00:05<00:02, 2.54MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  25%|███▋           | 7.06M/28.4M [00:03<00:11, 1.90MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  74%|███████████    | 21.0M/28.4M [00:02<00:00, 8.04MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  26%|███▊           | 7.31M/28.4M [00:03<00:10, 1.98MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:  78%|███████████▋   | 22.2M/28.4M [00:04<00:02, 2.73MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  78%|███████████▋   | 22.0M/28.4M [00:02<00:00, 7.40MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  76%|███████████▍   | 21.6M/28.4M [00:05<00:02, 2.80MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  27%|████           | 7.70M/28.4M [00:03<00:09, 2.23MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:  80%|████████████   | 22.8M/28.4M [00:04<00:02, 2.75MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  28%|████▏          | 7.96M/28.4M [00:03<00:08, 2.30MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  83%|████████████▍  | 23.5M/28.4M [00:02<00:00, 7.64MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  78%|███████████▋   | 22.2M/28.4M [00:06<00:02, 2.99MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:  82%|████████████▎  | 23.3M/28.4M [00:04<00:01, 2.85MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  29%|████▍          | 8.34M/28.4M [00:04<00:08, 2.38MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  87%|█████████████  | 24.8M/28.4M [00:02<00:00, 7.87MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  80%|████████████   | 22.8M/28.4M [00:06<00:01, 3.21MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  91%|█████████████▋ | 25.9M/28.4M [00:06<00:00, 3.93MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  84%|████████████▌  | 23.8M/28.4M [00:05<00:01, 2.85MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  92%|█████████████▊ | 26.1M/28.4M [00:02<00:00, 7.79MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  83%|████████████▍  | 23.5M/28.4M [00:06<00:01, 3.37MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  32%|████▊          | 9.04M/28.4M [00:04<00:07, 2.63MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-4499376.pt:  85%|████████████▊  | 24.3M/28.4M [00:05<00:01, 2.88MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  84%|████████████▌  | 23.8M/28.4M [00:06<00:01, 3.34MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  33%|████▉          | 9.32M/28.4M [00:04<00:07, 2.67MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt:  96%|██████████████▍| 27.4M/28.4M [00:03<00:00, 7.63MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  85%|████████████▊  | 24.2M/28.4M [00:06<00:01, 3.32MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  96%|██████████████▍| 27.3M/28.4M [00:06<00:00, 3.65MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  34%|█████          | 9.60M/28.4M [00:04<00:07, 2.62MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  88%|█████████████▏ | 24.9M/28.4M [00:05<00:01, 2.78MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  86%|████████████▉  | 24.5M/28.4M [00:06<00:01, 3.14MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  35%|█████▏         | 9.88M/28.4M [00:04<00:07, 2.57MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt:  98%|██████████████▋| 27.9M/28.4M [00:06<00:00, 3.58MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  87%|█████████████  | 24.9M/28.4M [00:06<00:01, 2.96MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  89%|█████████████▍ | 25.4M/28.4M [00:05<00:01, 2.81MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-3499096.pt: 100%|██████████████▉| 28.3M/28.4M [00:07<00:00, 3.51MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  37%|█████▌         | 10.5M/28.4M [00:04<00:06, 2.66MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-5000290.pt: 100%|███████████████| 28.4M/28.4M [00:03<00:00, 7.77MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  39%|█████▊         | 11.0M/28.4M [00:05<00:05, 2.97MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3499096.pt: 100%|███████████████| 28.4M/28.4M [00:07<00:00, 3.90MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  95%|██████████████▏| 26.9M/28.4M [00:06<00:00, 3.66MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  94%|██████████████▏| 26.8M/28.4M [00:07<00:00, 4.16MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "checkpoint.pt:   0%|                                | 0.00/28.4M [00:00<?, ?B/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  97%|██████████████▍| 27.5M/28.4M [00:06<00:00, 3.57MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:   7%|█▌                     | 1.87M/28.4M [00:00<00:01, 13.7MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  97%|██████████████▌| 27.6M/28.4M [00:07<00:00, 4.06MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4499376.pt:  98%|██████████████▊| 28.0M/28.4M [00:06<00:00, 3.49MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  45%|██████▊        | 12.8M/28.4M [00:05<00:04, 3.51MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "events.out.tfevents.1698227228.adminuser-System-Product-Name.7350.0:   0%| | 0.0\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "Upload 13 LFS files:  15%|███▌                   | 2/13 [00:07<00:46,  4.27s/it]\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt:  99%|██████████████▊| 28.0M/28.4M [00:07<00:00, 3.94MB/s]\u001b[A\u001b[A\u001b[A\n",
+      "checkpoint.pt:  11%|██▌                    | 3.24M/28.4M [00:00<00:02, 8.95MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  46%|██████▉        | 13.2M/28.4M [00:05<00:04, 3.40MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  15%|███▍                   | 4.21M/28.4M [00:00<00:02, 8.08MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "events.out.tfevents.1698227228.adminuser-System-Product-Name.7350.0:  42%|▍| 819\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  47%|███████        | 13.5M/28.4M [00:05<00:04, 3.08MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  18%|████                   | 5.05M/28.4M [00:00<00:02, 8.15MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "events.out.tfevents.1698227228.adminuser-System-Product-Name.7350.0:  65%|▋| 1.2\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  49%|███████▎       | 13.9M/28.4M [00:05<00:04, 3.17MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "SoccerTwos-3999170.pt: 100%|███████████████| 28.4M/28.4M [00:08<00:00, 3.51MB/s]\u001b[A\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "\n",
+      "checkpoint.pt:  21%|████▊                  | 5.98M/28.4M [00:00<00:03, 6.95MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4499376.pt: 100%|███████████████| 28.4M/28.4M [00:06<00:00, 4.16MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  52%|███████▊       | 14.9M/28.4M [00:06<00:03, 3.74MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  26%|██████                 | 7.52M/28.4M [00:00<00:02, 7.84MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos.onnx:   0%|                              | 0.00/1.77M [00:00<?, ?B/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  31%|███████▏               | 8.86M/28.4M [00:01<00:02, 8.60MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "\n",
+      "events.out.tfevents.1698227228.adminuser-System-Product-Name.7350.0: 100%|█| 1.9\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "SoccerTwos.onnx:  83%|█████████████████▌   | 1.47M/1.77M [00:00<00:00, 13.3MB/s]\n",
+      "checkpoint.pt:  36%|████████▎              | 10.3M/28.4M [00:01<00:01, 9.53MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  56%|████████▍      | 16.0M/28.4M [00:06<00:04, 3.01MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  41%|█████████▍             | 11.6M/28.4M [00:01<00:01, 9.54MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  59%|████████▊      | 16.7M/28.4M [00:06<00:03, 3.70MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  45%|██████████▎            | 12.8M/28.4M [00:01<00:01, 9.97MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  61%|█████████▏     | 17.4M/28.4M [00:06<00:02, 4.09MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos.onnx: 100%|█████████████████████| 1.77M/1.77M [00:00<00:00, 3.15MB/s]\u001b[A\n",
+      "\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  64%|█████████▌     | 18.2M/28.4M [00:06<00:02, 4.75MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  54%|████████████▎          | 15.2M/28.4M [00:01<00:01, 9.94MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  66%|█████████▉     | 18.9M/28.4M [00:06<00:01, 5.09MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  69%|██████████▍    | 19.7M/28.4M [00:07<00:01, 5.61MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  57%|█████████████▏         | 16.3M/28.4M [00:01<00:01, 6.97MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  72%|██████████▊    | 20.6M/28.4M [00:07<00:01, 6.02MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  65%|██████████████▉        | 18.5M/28.4M [00:02<00:01, 9.43MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  75%|███████████▏   | 21.2M/28.4M [00:07<00:01, 5.96MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  70%|████████████████▏      | 20.0M/28.4M [00:02<00:00, 9.67MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  77%|███████████▌   | 21.8M/28.4M [00:07<00:01, 5.96MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  79%|███████████▊   | 22.5M/28.4M [00:07<00:00, 6.02MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  75%|█████████████████▎     | 21.4M/28.4M [00:02<00:00, 9.79MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  82%|████████████▎  | 23.3M/28.4M [00:07<00:00, 6.15MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  80%|██████████████████▍    | 22.8M/28.4M [00:02<00:00, 10.1MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  85%|████████████▋  | 24.0M/28.4M [00:07<00:00, 6.41MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  85%|███████████████████▌   | 24.2M/28.4M [00:02<00:00, 10.6MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  87%|█████████████  | 24.8M/28.4M [00:07<00:00, 6.45MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  90%|████████████████████▋  | 25.6M/28.4M [00:02<00:00, 10.7MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  90%|█████████████▌ | 25.7M/28.4M [00:07<00:00, 6.66MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  95%|█████████████████████▊ | 26.9M/28.4M [00:02<00:00, 10.9MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  93%|█████████████▉ | 26.5M/28.4M [00:08<00:00, 6.71MB/s]\u001b[A\u001b[A\n",
+      "checkpoint.pt:  99%|██████████████████████▉| 28.3M/28.4M [00:02<00:00, 10.9MB/s]\u001b[A\n",
+      "\n",
+      "SoccerTwos-4999290.pt:  96%|██████████████▍| 27.3M/28.4M [00:08<00:00, 6.83MB/s]\u001b[A\u001b[A\n",
+      "\n",
+      "checkpoint.pt: 100%|███████████████████████| 28.4M/28.4M [00:03<00:00, 8.74MB/s]\u001b[A\u001b[A\n",
+      "SoccerTwos-4999290.pt: 100%|███████████████| 28.4M/28.4M [00:08<00:00, 3.31MB/s]\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "Upload 13 LFS files: 100%|██████████████████████| 13/13 [00:10<00:00,  1.18it/s]\u001b[A\u001b[A\u001b[A\u001b[A\n",
+      "[INFO] Your model is pushed to the hub. You can view your model here: https://huggingface.co/jake-walker/poca-SoccerTwos\n"
+     ]
+    }
+   ],
+   "source": [
+    "!mlagents-push-to-hf --run-id=\"SoccerTwos\" --local-dir=\"./results/SoccerTwos\" --repo-id=\"jake-walker/poca-SoccerTwos\" --commit-message=\"Initial commit\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3db69d1-5a8a-45e7-aae5-1eb35095d202",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}